Remove unaligned functions, since most function support unaligned memory now. This reduces complexity and improves performance for unaligned cases because C code can be avoided, and overhead is less. Downside is old cpus (core2 and earlier) will be slower for aligned memory case. Except mips, which has alignment requirement, but remove unaligned variant.

BUG=365
TESTED=unittest builds and passes locally
R=harryjin@google.com

Review URL: https://webrtc-codereview.appspot.com/24839004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@1113 16f28f9a-4ce2-e073-06de-1de4eb20be90
This commit is contained in:
fbarchard@google.com 2014-10-07 00:59:31 +00:00
parent 76301c9329
commit ca308327d2
14 changed files with 140 additions and 3334 deletions

View File

@ -655,13 +655,6 @@ void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix);
void RGBAToYRow_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix); void RGBAToYRow_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix);
void RGB24ToYRow_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix); void RGB24ToYRow_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix);
void RAWToYRow_SSSE3(const uint8* src_raw, uint8* dst_y, int pix); void RAWToYRow_SSSE3(const uint8* src_raw, uint8* dst_y, int pix);
void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
void ARGBToYJRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
void BGRAToYRow_Unaligned_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix);
void ABGRToYRow_Unaligned_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix);
void RGBAToYRow_Unaligned_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix);
void RGB24ToYRow_Unaligned_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix);
void RAWToYRow_Unaligned_SSSE3(const uint8* src_raw, uint8* dst_y, int pix);
void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix); void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix);
void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix); void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix);
void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
@ -740,16 +733,6 @@ void ABGRToUVRow_SSSE3(const uint8* src_abgr, int src_stride_abgr,
uint8* dst_u, uint8* dst_v, int width); uint8* dst_u, uint8* dst_v, int width);
void RGBAToUVRow_SSSE3(const uint8* src_rgba, int src_stride_rgba, void RGBAToUVRow_SSSE3(const uint8* src_rgba, int src_stride_rgba,
uint8* dst_u, uint8* dst_v, int width); uint8* dst_u, uint8* dst_v, int width);
void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width);
void ARGBToUVJRow_Unaligned_SSSE3(const uint8* src_argb, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width);
void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_bgra, int src_stride_bgra,
uint8* dst_u, uint8* dst_v, int width);
void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_abgr, int src_stride_abgr,
uint8* dst_u, uint8* dst_v, int width);
void RGBAToUVRow_Unaligned_SSSE3(const uint8* src_rgba, int src_stride_rgba,
uint8* dst_u, uint8* dst_v, int width);
void ARGBToUVRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb, void ARGBToUVRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width); uint8* dst_u, uint8* dst_v, int width);
void ARGBToUVJRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb, void ARGBToUVJRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb,
@ -811,15 +794,11 @@ void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444,
void ARGBToUV444Row_SSSE3(const uint8* src_argb, void ARGBToUV444Row_SSSE3(const uint8* src_argb,
uint8* dst_u, uint8* dst_v, int width); uint8* dst_u, uint8* dst_v, int width);
void ARGBToUV444Row_Unaligned_SSSE3(const uint8* src_argb,
uint8* dst_u, uint8* dst_v, int width);
void ARGBToUV444Row_Any_SSSE3(const uint8* src_argb, void ARGBToUV444Row_Any_SSSE3(const uint8* src_argb,
uint8* dst_u, uint8* dst_v, int width); uint8* dst_u, uint8* dst_v, int width);
void ARGBToUV422Row_SSSE3(const uint8* src_argb, void ARGBToUV422Row_SSSE3(const uint8* src_argb,
uint8* dst_u, uint8* dst_v, int width); uint8* dst_u, uint8* dst_v, int width);
void ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb,
uint8* dst_u, uint8* dst_v, int width);
void ARGBToUV422Row_Any_SSSE3(const uint8* src_argb, void ARGBToUV422Row_Any_SSSE3(const uint8* src_argb,
uint8* dst_u, uint8* dst_v, int width); uint8* dst_u, uint8* dst_v, int width);
@ -857,10 +836,6 @@ void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int pix); int pix);
void SplitUVRow_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int pix);
void SplitUVRow_Unaligned_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u,
uint8* dst_v, int pix);
void SplitUVRow_Any_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, void SplitUVRow_Any_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int pix); int pix);
void SplitUVRow_Any_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, void SplitUVRow_Any_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
@ -878,8 +853,6 @@ void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width); int width);
void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv, void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width); int width);
void MergeUVRow_Unaligned_SSE2(const uint8* src_u, const uint8* src_v,
uint8* dst_uv, int width);
void MergeUVRow_Any_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, void MergeUVRow_Any_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
int width); int width);
void MergeUVRow_Any_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, void MergeUVRow_Any_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
@ -926,8 +899,6 @@ void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb,
const uint8* shuffler, int pix); const uint8* shuffler, int pix);
void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb, void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb,
const uint8* shuffler, int pix); const uint8* shuffler, int pix);
void ARGBShuffleRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_argb,
const uint8* shuffler, int pix);
void ARGBShuffleRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb, void ARGBShuffleRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
const uint8* shuffler, int pix); const uint8* shuffler, int pix);
void ARGBShuffleRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb, void ARGBShuffleRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb,
@ -994,7 +965,6 @@ void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix); void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
void I400ToARGBRow_Unaligned_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
void I400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int pix); void I400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int pix);
void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int pix); void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int pix);
void I400ToARGBRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, int pix); void I400ToARGBRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
@ -1152,7 +1122,6 @@ void I422ToRGB565Row_SSSE3(const uint8* src_y,
const uint8* src_v, const uint8* src_v,
uint8* dst_argb, uint8* dst_argb,
int width); int width);
// RGB24/RAW are unaligned.
void I422ToRGB24Row_SSSE3(const uint8* src_y, void I422ToRGB24Row_SSSE3(const uint8* src_y,
const uint8* src_u, const uint8* src_u,
const uint8* src_v, const uint8* src_v,
@ -1163,51 +1132,6 @@ void I422ToRAWRow_SSSE3(const uint8* src_y,
const uint8* src_v, const uint8* src_v,
uint8* dst_raw, uint8* dst_raw,
int width); int width);
void I444ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
int width);
void I422ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
int width);
void I411ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
int width);
void NV12ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
int width);
void NV21ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
const uint8* src_vu,
uint8* dst_argb,
int width);
void YUY2ToARGBRow_Unaligned_SSSE3(const uint8* src_yuy2,
uint8* dst_argb,
int width);
void UYVYToARGBRow_Unaligned_SSSE3(const uint8* src_uyvy,
uint8* dst_argb,
int width);
void I422ToBGRARow_Unaligned_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_bgra,
int width);
void I422ToABGRRow_Unaligned_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_abgr,
int width);
void I422ToRGBARow_Unaligned_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgba,
int width);
void I422ToARGBRow_Any_AVX2(const uint8* src_y, void I422ToARGBRow_Any_AVX2(const uint8* src_y,
const uint8* src_u, const uint8* src_u,
const uint8* src_v, const uint8* src_v,
@ -1280,7 +1204,6 @@ void I422ToRGB565Row_Any_SSSE3(const uint8* src_y,
const uint8* src_v, const uint8* src_v,
uint8* dst_rgba, uint8* dst_rgba,
int width); int width);
// RGB24/RAW are unaligned.
void I422ToRGB24Row_Any_SSSE3(const uint8* src_y, void I422ToRGB24Row_Any_SSSE3(const uint8* src_y,
const uint8* src_u, const uint8* src_u,
const uint8* src_v, const uint8* src_v,
@ -1494,12 +1417,6 @@ void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
uint8* dst_u, uint8* dst_v, int pix); uint8* dst_u, uint8* dst_v, int pix);
void YUY2ToUV422Row_SSE2(const uint8* src_yuy2, void YUY2ToUV422Row_SSE2(const uint8* src_yuy2,
uint8* dst_u, uint8* dst_v, int pix); uint8* dst_u, uint8* dst_v, int pix);
void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2,
uint8* dst_y, int pix);
void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2, int stride_yuy2,
uint8* dst_u, uint8* dst_v, int pix);
void YUY2ToUV422Row_Unaligned_SSE2(const uint8* src_yuy2,
uint8* dst_u, uint8* dst_v, int pix);
void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int pix); void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int pix);
void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2, void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2,
uint8* dst_u, uint8* dst_v, int pix); uint8* dst_u, uint8* dst_v, int pix);
@ -1535,12 +1452,6 @@ void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
uint8* dst_u, uint8* dst_v, int pix); uint8* dst_u, uint8* dst_v, int pix);
void UYVYToUV422Row_SSE2(const uint8* src_uyvy, void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
uint8* dst_u, uint8* dst_v, int pix); uint8* dst_u, uint8* dst_v, int pix);
void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy,
uint8* dst_y, int pix);
void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
uint8* dst_u, uint8* dst_v, int pix);
void UYVYToUV422Row_Unaligned_SSE2(const uint8* src_uyvy,
uint8* dst_u, uint8* dst_v, int pix);
void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix); void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix);
void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy, void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy,
uint8* dst_u, uint8* dst_v, int pix); uint8* dst_u, uint8* dst_v, int pix);

View File

@ -194,10 +194,7 @@ static void CopyPlane2(const uint8* src, int src_stride_0, int src_stride_1,
} }
#endif #endif
#if defined(HAS_COPYROW_SSE2) #if defined(HAS_COPYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) && if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32)) {
IS_ALIGNED(src, 16) &&
IS_ALIGNED(src_stride_0, 16) && IS_ALIGNED(src_stride_1, 16) &&
IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
CopyRow = CopyRow_SSE2; CopyRow = CopyRow_SSE2;
} }
#endif #endif
@ -291,14 +288,9 @@ static int X420ToI420(const uint8* src_y,
if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) { if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) {
SplitUVRow = SplitUVRow_Any_SSE2; SplitUVRow = SplitUVRow_Any_SSE2;
if (IS_ALIGNED(halfwidth, 16)) { if (IS_ALIGNED(halfwidth, 16)) {
SplitUVRow = SplitUVRow_Unaligned_SSE2;
if (IS_ALIGNED(src_uv, 16) && IS_ALIGNED(src_stride_uv, 16) &&
IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
SplitUVRow = SplitUVRow_SSE2; SplitUVRow = SplitUVRow_SSE2;
} }
} }
}
#endif #endif
#if defined(HAS_SPLITUVROW_AVX2) #if defined(HAS_SPLITUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) { if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) {
@ -317,17 +309,15 @@ static int X420ToI420(const uint8* src_y,
} }
#endif #endif
#if defined(HAS_SPLITUVROW_MIPS_DSPR2) #if defined(HAS_SPLITUVROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && halfwidth >= 16) { if (TestCpuFlag(kCpuHasMIPS_DSPR2) && halfwidth >= 16 &&
SplitUVRow = SplitUVRow_Any_MIPS_DSPR2; IS_ALIGNED(src_uv, 4) && IS_ALIGNED(src_stride_uv, 4) &&
if (IS_ALIGNED(halfwidth, 16)) {
SplitUVRow = SplitUVRow_Unaligned_MIPS_DSPR2;
if (IS_ALIGNED(src_uv, 4) && IS_ALIGNED(src_stride_uv, 4) &&
IS_ALIGNED(dst_u, 4) && IS_ALIGNED(dst_stride_u, 4) && IS_ALIGNED(dst_u, 4) && IS_ALIGNED(dst_stride_u, 4) &&
IS_ALIGNED(dst_v, 4) && IS_ALIGNED(dst_stride_v, 4)) { IS_ALIGNED(dst_v, 4) && IS_ALIGNED(dst_stride_v, 4)) {
SplitUVRow = SplitUVRow_Any_MIPS_DSPR2;
if (IS_ALIGNED(halfwidth, 16)) {
SplitUVRow = SplitUVRow_MIPS_DSPR2; SplitUVRow = SplitUVRow_MIPS_DSPR2;
} }
} }
}
#endif #endif
if (dst_y) { if (dst_y) {
@ -440,9 +430,7 @@ int Q420ToI420(const uint8* src_y, int src_stride_y,
} }
#endif #endif
#if defined(HAS_COPYROW_SSE2) #if defined(HAS_COPYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) && if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32)) {
IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
CopyRow = CopyRow_SSE2; CopyRow = CopyRow_SSE2;
} }
#endif #endif
@ -467,16 +455,10 @@ int Q420ToI420(const uint8* src_y, int src_stride_y,
YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2; YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2;
YUY2ToYRow = YUY2ToYRow_Any_SSE2; YUY2ToYRow = YUY2ToYRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
YUY2ToUV422Row = YUY2ToUV422Row_Unaligned_SSE2;
YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2;
if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16)) {
YUY2ToUV422Row = YUY2ToUV422Row_SSE2; YUY2ToUV422Row = YUY2ToUV422Row_SSE2;
if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
YUY2ToYRow = YUY2ToYRow_SSE2; YUY2ToYRow = YUY2ToYRow_SSE2;
} }
} }
}
}
#endif #endif
#if defined(HAS_YUY2TOYROW_AVX2) #if defined(HAS_YUY2TOYROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && width >= 32) { if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
@ -543,16 +525,10 @@ int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2,
YUY2ToUVRow = YUY2ToUVRow_Any_SSE2; YUY2ToUVRow = YUY2ToUVRow_Any_SSE2;
YUY2ToYRow = YUY2ToYRow_Any_SSE2; YUY2ToYRow = YUY2ToYRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
YUY2ToUVRow = YUY2ToUVRow_Unaligned_SSE2;
YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2;
if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16)) {
YUY2ToUVRow = YUY2ToUVRow_SSE2; YUY2ToUVRow = YUY2ToUVRow_SSE2;
if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
YUY2ToYRow = YUY2ToYRow_SSE2; YUY2ToYRow = YUY2ToYRow_SSE2;
} }
} }
}
}
#endif #endif
#if defined(HAS_YUY2TOYROW_AVX2) #if defined(HAS_YUY2TOYROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && width >= 32) { if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
@ -616,16 +592,10 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
UYVYToUVRow = UYVYToUVRow_Any_SSE2; UYVYToUVRow = UYVYToUVRow_Any_SSE2;
UYVYToYRow = UYVYToYRow_Any_SSE2; UYVYToYRow = UYVYToYRow_Any_SSE2;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
UYVYToUVRow = UYVYToUVRow_Unaligned_SSE2;
UYVYToYRow = UYVYToYRow_Unaligned_SSE2;
if (IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16)) {
UYVYToUVRow = UYVYToUVRow_SSE2; UYVYToUVRow = UYVYToUVRow_SSE2;
if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
UYVYToYRow = UYVYToYRow_SSE2; UYVYToYRow = UYVYToYRow_SSE2;
} }
} }
}
}
#endif #endif
#if defined(HAS_UYVYTOYROW_AVX2) #if defined(HAS_UYVYTOYROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && width >= 32) { if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
@ -694,16 +664,10 @@ int ARGBToI420(const uint8* src_argb, int src_stride_argb,
ARGBToUVRow = ARGBToUVRow_Any_SSSE3; ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
ARGBToYRow = ARGBToYRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3;
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToUVRow = ARGBToUVRow_SSSE3;
if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3;
} }
} }
}
}
#endif #endif
#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) #if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && width >= 32) { if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
@ -776,16 +740,10 @@ int BGRAToI420(const uint8* src_bgra, int src_stride_bgra,
BGRAToUVRow = BGRAToUVRow_Any_SSSE3; BGRAToUVRow = BGRAToUVRow_Any_SSSE3;
BGRAToYRow = BGRAToYRow_Any_SSSE3; BGRAToYRow = BGRAToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
BGRAToUVRow = BGRAToUVRow_Unaligned_SSSE3;
BGRAToYRow = BGRAToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_bgra, 16) && IS_ALIGNED(src_stride_bgra, 16)) {
BGRAToUVRow = BGRAToUVRow_SSSE3; BGRAToUVRow = BGRAToUVRow_SSSE3;
if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
BGRAToYRow = BGRAToYRow_SSSE3; BGRAToYRow = BGRAToYRow_SSSE3;
} }
} }
}
}
#elif defined(HAS_BGRATOYROW_NEON) #elif defined(HAS_BGRATOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
BGRAToYRow = BGRAToYRow_Any_NEON; BGRAToYRow = BGRAToYRow_Any_NEON;
@ -847,16 +805,10 @@ int ABGRToI420(const uint8* src_abgr, int src_stride_abgr,
ABGRToUVRow = ABGRToUVRow_Any_SSSE3; ABGRToUVRow = ABGRToUVRow_Any_SSSE3;
ABGRToYRow = ABGRToYRow_Any_SSSE3; ABGRToYRow = ABGRToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ABGRToUVRow = ABGRToUVRow_Unaligned_SSSE3;
ABGRToYRow = ABGRToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_abgr, 16) && IS_ALIGNED(src_stride_abgr, 16)) {
ABGRToUVRow = ABGRToUVRow_SSSE3; ABGRToUVRow = ABGRToUVRow_SSSE3;
if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ABGRToYRow = ABGRToYRow_SSSE3; ABGRToYRow = ABGRToYRow_SSSE3;
} }
} }
}
}
#elif defined(HAS_ABGRTOYROW_NEON) #elif defined(HAS_ABGRTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ABGRToYRow = ABGRToYRow_Any_NEON; ABGRToYRow = ABGRToYRow_Any_NEON;
@ -918,16 +870,10 @@ int RGBAToI420(const uint8* src_rgba, int src_stride_rgba,
RGBAToUVRow = RGBAToUVRow_Any_SSSE3; RGBAToUVRow = RGBAToUVRow_Any_SSSE3;
RGBAToYRow = RGBAToYRow_Any_SSSE3; RGBAToYRow = RGBAToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
RGBAToUVRow = RGBAToUVRow_Unaligned_SSSE3;
RGBAToYRow = RGBAToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_rgba, 16) && IS_ALIGNED(src_stride_rgba, 16)) {
RGBAToUVRow = RGBAToUVRow_SSSE3; RGBAToUVRow = RGBAToUVRow_SSSE3;
if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
RGBAToYRow = RGBAToYRow_SSSE3; RGBAToYRow = RGBAToYRow_SSSE3;
} }
} }
}
}
#elif defined(HAS_RGBATOYROW_NEON) #elif defined(HAS_RGBATOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
RGBAToYRow = RGBAToYRow_Any_NEON; RGBAToYRow = RGBAToYRow_Any_NEON;
@ -1029,12 +975,9 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24,
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ARGBToYRow = ARGBToYRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3;
} }
} }
}
#endif // HAS_ARGBTOUVROW_SSSE3 #endif // HAS_ARGBTOUVROW_SSSE3
{ {
@ -1146,12 +1089,9 @@ int RAWToI420(const uint8* src_raw, int src_stride_raw,
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ARGBToYRow = ARGBToYRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3;
} }
} }
}
#endif // HAS_ARGBTOUVROW_SSSE3 #endif // HAS_ARGBTOUVROW_SSSE3
{ {
@ -1260,12 +1200,9 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565,
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ARGBToYRow = ARGBToYRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3;
} }
} }
}
#endif // HAS_ARGBTOUVROW_SSSE3 #endif // HAS_ARGBTOUVROW_SSSE3
#endif // HAS_RGB565TOYROW_NEON #endif // HAS_RGB565TOYROW_NEON
@ -1377,12 +1314,9 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555,
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ARGBToYRow = ARGBToYRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3;
} }
} }
}
#endif // HAS_ARGBTOUVROW_SSSE3 #endif // HAS_ARGBTOUVROW_SSSE3
#endif // HAS_ARGB1555TOYROW_NEON #endif // HAS_ARGB1555TOYROW_NEON
@ -1495,12 +1429,9 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ARGBToYRow = ARGBToYRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3;
} }
} }
}
#endif // HAS_ARGBTOUVROW_SSSE3 #endif // HAS_ARGBTOUVROW_SSSE3
#endif // HAS_ARGB4444TOYROW_NEON #endif // HAS_ARGB4444TOYROW_NEON

View File

@ -82,12 +82,9 @@ int I444ToARGB(const uint8* src_y, int src_stride_y,
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
I444ToARGBRow = I444ToARGBRow_Any_SSSE3; I444ToARGBRow = I444ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
I444ToARGBRow = I444ToARGBRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
I444ToARGBRow = I444ToARGBRow_SSSE3; I444ToARGBRow = I444ToARGBRow_SSSE3;
} }
} }
}
#elif defined(HAS_I444TOARGBROW_NEON) #elif defined(HAS_I444TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
I444ToARGBRow = I444ToARGBRow_Any_NEON; I444ToARGBRow = I444ToARGBRow_Any_NEON;
@ -144,12 +141,9 @@ int I422ToARGB(const uint8* src_y, int src_stride_y,
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
I422ToARGBRow = I422ToARGBRow_Any_SSSE3; I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
I422ToARGBRow = I422ToARGBRow_SSSE3; I422ToARGBRow = I422ToARGBRow_SSSE3;
} }
} }
}
#endif #endif
#if defined(HAS_I422TOARGBROW_AVX2) #if defined(HAS_I422TOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && width >= 16) { if (TestCpuFlag(kCpuHasAVX2) && width >= 16) {
@ -224,12 +218,9 @@ int I411ToARGB(const uint8* src_y, int src_stride_y,
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
I411ToARGBRow = I411ToARGBRow_Any_SSSE3; I411ToARGBRow = I411ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
I411ToARGBRow = I411ToARGBRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
I411ToARGBRow = I411ToARGBRow_SSSE3; I411ToARGBRow = I411ToARGBRow_SSSE3;
} }
} }
}
#elif defined(HAS_I411TOARGBROW_NEON) #elif defined(HAS_I411TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
I411ToARGBRow = I411ToARGBRow_Any_NEON; I411ToARGBRow = I411ToARGBRow_Any_NEON;
@ -276,8 +267,7 @@ int I400ToARGB_Reference(const uint8* src_y, int src_stride_y,
src_stride_y = dst_stride_argb = 0; src_stride_y = dst_stride_argb = 0;
} }
#if defined(HAS_YTOARGBROW_SSE2) #if defined(HAS_YTOARGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 8 && if (TestCpuFlag(kCpuHasSSE2) && width >= 8) {
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
YToARGBRow = YToARGBRow_Any_SSE2; YToARGBRow = YToARGBRow_Any_SSE2;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
YToARGBRow = YToARGBRow_SSE2; YToARGBRow = YToARGBRow_SSE2;
@ -329,12 +319,9 @@ int I400ToARGB(const uint8* src_y, int src_stride_y,
if (TestCpuFlag(kCpuHasSSE2) && width >= 8) { if (TestCpuFlag(kCpuHasSSE2) && width >= 8) {
I400ToARGBRow = I400ToARGBRow_Any_SSE2; I400ToARGBRow = I400ToARGBRow_Any_SSE2;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
I400ToARGBRow = I400ToARGBRow_Unaligned_SSE2;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
I400ToARGBRow = I400ToARGBRow_SSE2; I400ToARGBRow = I400ToARGBRow_SSE2;
} }
} }
}
#elif defined(HAS_I400TOARGBROW_NEON) #elif defined(HAS_I400TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
I400ToARGBRow = I400ToARGBRow_Any_NEON; I400ToARGBRow = I400ToARGBRow_Any_NEON;
@ -447,8 +434,7 @@ int RGB24ToARGB(const uint8* src_rgb24, int src_stride_rgb24,
src_stride_rgb24 = dst_stride_argb = 0; src_stride_rgb24 = dst_stride_argb = 0;
} }
#if defined(HAS_RGB24TOARGBROW_SSSE3) #if defined(HAS_RGB24TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16 && if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3; RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
RGB24ToARGBRow = RGB24ToARGBRow_SSSE3; RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
@ -497,8 +483,7 @@ int RAWToARGB(const uint8* src_raw, int src_stride_raw,
src_stride_raw = dst_stride_argb = 0; src_stride_raw = dst_stride_argb = 0;
} }
#if defined(HAS_RAWTOARGBROW_SSSE3) #if defined(HAS_RAWTOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16 && if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
RAWToARGBRow = RAWToARGBRow_Any_SSSE3; RAWToARGBRow = RAWToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
RAWToARGBRow = RAWToARGBRow_SSSE3; RAWToARGBRow = RAWToARGBRow_SSSE3;
@ -547,8 +532,7 @@ int RGB565ToARGB(const uint8* src_rgb565, int src_stride_rgb565,
src_stride_rgb565 = dst_stride_argb = 0; src_stride_rgb565 = dst_stride_argb = 0;
} }
#if defined(HAS_RGB565TOARGBROW_SSE2) #if defined(HAS_RGB565TOARGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 8 && if (TestCpuFlag(kCpuHasSSE2) && width >= 8) {
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
RGB565ToARGBRow = RGB565ToARGBRow_Any_SSE2; RGB565ToARGBRow = RGB565ToARGBRow_Any_SSE2;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
RGB565ToARGBRow = RGB565ToARGBRow_SSE2; RGB565ToARGBRow = RGB565ToARGBRow_SSE2;
@ -597,8 +581,7 @@ int ARGB1555ToARGB(const uint8* src_argb1555, int src_stride_argb1555,
src_stride_argb1555 = dst_stride_argb = 0; src_stride_argb1555 = dst_stride_argb = 0;
} }
#if defined(HAS_ARGB1555TOARGBROW_SSE2) #if defined(HAS_ARGB1555TOARGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 8 && if (TestCpuFlag(kCpuHasSSE2) && width >= 8) {
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_SSE2; ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_SSE2;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
ARGB1555ToARGBRow = ARGB1555ToARGBRow_SSE2; ARGB1555ToARGBRow = ARGB1555ToARGBRow_SSE2;
@ -647,8 +630,7 @@ int ARGB4444ToARGB(const uint8* src_argb4444, int src_stride_argb4444,
src_stride_argb4444 = dst_stride_argb = 0; src_stride_argb4444 = dst_stride_argb = 0;
} }
#if defined(HAS_ARGB4444TOARGBROW_SSE2) #if defined(HAS_ARGB4444TOARGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 8 && if (TestCpuFlag(kCpuHasSSE2) && width >= 8) {
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_SSE2; ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_SSE2;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_SSE2; ARGB4444ToARGBRow = ARGB4444ToARGBRow_SSE2;
@ -696,12 +678,9 @@ int NV12ToARGB(const uint8* src_y, int src_stride_y,
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
NV12ToARGBRow = NV12ToARGBRow_Any_SSSE3; NV12ToARGBRow = NV12ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
NV12ToARGBRow = NV12ToARGBRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
NV12ToARGBRow = NV12ToARGBRow_SSSE3; NV12ToARGBRow = NV12ToARGBRow_SSSE3;
} }
} }
}
#elif defined(HAS_NV12TOARGBROW_NEON) #elif defined(HAS_NV12TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
NV12ToARGBRow = NV12ToARGBRow_Any_NEON; NV12ToARGBRow = NV12ToARGBRow_Any_NEON;
@ -747,12 +726,9 @@ int NV21ToARGB(const uint8* src_y, int src_stride_y,
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
NV21ToARGBRow = NV21ToARGBRow_Any_SSSE3; NV21ToARGBRow = NV21ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
NV21ToARGBRow = NV21ToARGBRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
NV21ToARGBRow = NV21ToARGBRow_SSSE3; NV21ToARGBRow = NV21ToARGBRow_SSSE3;
} }
} }
}
#endif #endif
#if defined(HAS_NV21TOARGBROW_NEON) #if defined(HAS_NV21TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
@ -798,12 +774,9 @@ int M420ToARGB(const uint8* src_m420, int src_stride_m420,
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
NV12ToARGBRow = NV12ToARGBRow_Any_SSSE3; NV12ToARGBRow = NV12ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
NV12ToARGBRow = NV12ToARGBRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
NV12ToARGBRow = NV12ToARGBRow_SSSE3; NV12ToARGBRow = NV12ToARGBRow_SSSE3;
} }
} }
}
#elif defined(HAS_NV12TOARGBROW_NEON) #elif defined(HAS_NV12TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
NV12ToARGBRow = NV12ToARGBRow_Any_NEON; NV12ToARGBRow = NV12ToARGBRow_Any_NEON;
@ -856,13 +829,9 @@ int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
YUY2ToARGBRow = YUY2ToARGBRow_Any_SSSE3; YUY2ToARGBRow = YUY2ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
YUY2ToARGBRow = YUY2ToARGBRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
YUY2ToARGBRow = YUY2ToARGBRow_SSSE3; YUY2ToARGBRow = YUY2ToARGBRow_SSSE3;
} }
} }
}
#elif defined(HAS_YUY2TOARGBROW_NEON) #elif defined(HAS_YUY2TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
YUY2ToARGBRow = YUY2ToARGBRow_Any_NEON; YUY2ToARGBRow = YUY2ToARGBRow_Any_NEON;
@ -909,13 +878,9 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
UYVYToARGBRow = UYVYToARGBRow_Any_SSSE3; UYVYToARGBRow = UYVYToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
UYVYToARGBRow = UYVYToARGBRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
UYVYToARGBRow = UYVYToARGBRow_SSSE3; UYVYToARGBRow = UYVYToARGBRow_SSSE3;
} }
} }
}
#elif defined(HAS_UYVYTOARGBROW_NEON) #elif defined(HAS_UYVYTOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
UYVYToARGBRow = UYVYToARGBRow_Any_NEON; UYVYToARGBRow = UYVYToARGBRow_Any_NEON;

View File

@ -400,14 +400,9 @@ int I420ToNV12(const uint8* src_y, int src_stride_y,
if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) { if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) {
MergeUVRow_ = MergeUVRow_Any_SSE2; MergeUVRow_ = MergeUVRow_Any_SSE2;
if (IS_ALIGNED(halfwidth, 16)) { if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_Unaligned_SSE2;
if (IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) &&
IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) &&
IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
MergeUVRow_ = MergeUVRow_SSE2; MergeUVRow_ = MergeUVRow_SSE2;
} }
} }
}
#endif #endif
#if defined(HAS_MERGEUVROW_AVX2) #if defined(HAS_MERGEUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) { if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) {
@ -479,12 +474,9 @@ int I420ToARGB(const uint8* src_y, int src_stride_y,
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
I422ToARGBRow = I422ToARGBRow_Any_SSSE3; I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
I422ToARGBRow = I422ToARGBRow_SSSE3; I422ToARGBRow = I422ToARGBRow_SSSE3;
} }
} }
}
#endif #endif
#if defined(HAS_I422TOARGBROW_AVX2) #if defined(HAS_I422TOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && width >= 16) { if (TestCpuFlag(kCpuHasAVX2) && width >= 16) {
@ -551,12 +543,9 @@ int I420ToBGRA(const uint8* src_y, int src_stride_y,
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
I422ToBGRARow = I422ToBGRARow_Any_SSSE3; I422ToBGRARow = I422ToBGRARow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
I422ToBGRARow = I422ToBGRARow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_bgra, 16) && IS_ALIGNED(dst_stride_bgra, 16)) {
I422ToBGRARow = I422ToBGRARow_SSSE3; I422ToBGRARow = I422ToBGRARow_SSSE3;
} }
} }
}
#elif defined(HAS_I422TOBGRAROW_NEON) #elif defined(HAS_I422TOBGRAROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
I422ToBGRARow = I422ToBGRARow_Any_NEON; I422ToBGRARow = I422ToBGRARow_Any_NEON;
@ -613,12 +602,9 @@ int I420ToABGR(const uint8* src_y, int src_stride_y,
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
I422ToABGRRow = I422ToABGRRow_Any_SSSE3; I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
I422ToABGRRow = I422ToABGRRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_abgr, 16) && IS_ALIGNED(dst_stride_abgr, 16)) {
I422ToABGRRow = I422ToABGRRow_SSSE3; I422ToABGRRow = I422ToABGRRow_SSSE3;
} }
} }
}
#elif defined(HAS_I422TOABGRROW_NEON) #elif defined(HAS_I422TOABGRROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
I422ToABGRRow = I422ToABGRRow_Any_NEON; I422ToABGRRow = I422ToABGRRow_Any_NEON;
@ -667,12 +653,9 @@ int I420ToRGBA(const uint8* src_y, int src_stride_y,
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
I422ToRGBARow = I422ToRGBARow_Any_SSSE3; I422ToRGBARow = I422ToRGBARow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
I422ToRGBARow = I422ToRGBARow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_rgba, 16) && IS_ALIGNED(dst_stride_rgba, 16)) {
I422ToRGBARow = I422ToRGBARow_SSSE3; I422ToRGBARow = I422ToRGBARow_SSSE3;
} }
} }
}
#elif defined(HAS_I422TORGBAROW_NEON) #elif defined(HAS_I422TORGBAROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
I422ToRGBARow = I422ToRGBARow_Any_NEON; I422ToRGBARow = I422ToRGBARow_Any_NEON;

View File

@ -54,12 +54,9 @@ int ARGBToI444(const uint8* src_argb, int src_stride_argb,
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ARGBToUV444Row = ARGBToUV444Row_Any_SSSE3; ARGBToUV444Row = ARGBToUV444Row_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToUV444Row = ARGBToUV444Row_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToUV444Row = ARGBToUV444Row_SSSE3; ARGBToUV444Row = ARGBToUV444Row_SSSE3;
} }
} }
}
#elif defined(HAS_ARGBTOUV444ROW_NEON) #elif defined(HAS_ARGBTOUV444ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGBToUV444Row = ARGBToUV444Row_Any_NEON; ARGBToUV444Row = ARGBToUV444Row_Any_NEON;
@ -72,13 +69,9 @@ int ARGBToI444(const uint8* src_argb, int src_stride_argb,
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ARGBToYRow = ARGBToYRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3;
} }
} }
}
#elif defined(HAS_ARGBTOYROW_NEON) #elif defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
@ -133,12 +126,9 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb,
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3; ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToUV422Row = ARGBToUV422Row_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToUV422Row = ARGBToUV422Row_SSSE3; ARGBToUV422Row = ARGBToUV422Row_SSSE3;
} }
} }
}
#elif defined(HAS_ARGBTOUV422ROW_NEON) #elif defined(HAS_ARGBTOUV422ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 16) { if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
ARGBToUV422Row = ARGBToUV422Row_Any_NEON; ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
@ -152,13 +142,9 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb,
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ARGBToYRow = ARGBToYRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3;
} }
} }
}
#elif defined(HAS_ARGBTOYROW_NEON) #elif defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGBToYRow = ARGBToYRow_Any_NEON; ARGBToYRow = ARGBToYRow_Any_NEON;
@ -212,13 +198,9 @@ int ARGBToI411(const uint8* src_argb, int src_stride_argb,
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ARGBToYRow = ARGBToYRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3;
} }
} }
}
#endif #endif
#if defined(HAS_ARGBTOYROW_AVX2) #if defined(HAS_ARGBTOYROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && width >= 32) { if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
@ -285,16 +267,10 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
ARGBToUVRow = ARGBToUVRow_Any_SSSE3; ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
ARGBToYRow = ARGBToYRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3;
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToUVRow = ARGBToUVRow_SSSE3;
if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3;
} }
} }
}
}
#elif defined(HAS_ARGBTOYROW_NEON) #elif defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGBToYRow = ARGBToYRow_Any_NEON; ARGBToYRow = ARGBToYRow_Any_NEON;
@ -315,12 +291,9 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) { if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) {
MergeUVRow_ = MergeUVRow_Any_SSE2; MergeUVRow_ = MergeUVRow_Any_SSE2;
if (IS_ALIGNED(halfwidth, 16)) { if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_Unaligned_SSE2;
if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
MergeUVRow_ = MergeUVRow_SSE2; MergeUVRow_ = MergeUVRow_SSE2;
} }
} }
}
#endif #endif
#if defined(HAS_MERGEUVROW_AVX2) #if defined(HAS_MERGEUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) { if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) {
@ -392,16 +365,10 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
ARGBToUVRow = ARGBToUVRow_Any_SSSE3; ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
ARGBToYRow = ARGBToYRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3;
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToUVRow = ARGBToUVRow_SSSE3;
if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3;
} }
} }
}
}
#elif defined(HAS_ARGBTOYROW_NEON) #elif defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGBToYRow = ARGBToYRow_Any_NEON; ARGBToYRow = ARGBToYRow_Any_NEON;
@ -422,12 +389,9 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) { if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) {
MergeUVRow_ = MergeUVRow_Any_SSE2; MergeUVRow_ = MergeUVRow_Any_SSE2;
if (IS_ALIGNED(halfwidth, 16)) { if (IS_ALIGNED(halfwidth, 16)) {
MergeUVRow_ = MergeUVRow_Unaligned_SSE2;
if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) {
MergeUVRow_ = MergeUVRow_SSE2; MergeUVRow_ = MergeUVRow_SSE2;
} }
} }
}
#endif #endif
#if defined(HAS_MERGEUVROW_AVX2) #if defined(HAS_MERGEUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) { if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) {
@ -503,12 +467,9 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3; ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToUV422Row = ARGBToUV422Row_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToUV422Row = ARGBToUV422Row_SSSE3; ARGBToUV422Row = ARGBToUV422Row_SSSE3;
} }
} }
}
#elif defined(HAS_ARGBTOUV422ROW_NEON) #elif defined(HAS_ARGBTOUV422ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 16) { if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
ARGBToUV422Row = ARGBToUV422Row_Any_NEON; ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
@ -521,12 +482,9 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ARGBToYRow = ARGBToYRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3;
} }
} }
}
#elif defined(HAS_ARGBTOYROW_NEON) #elif defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGBToYRow = ARGBToYRow_Any_NEON; ARGBToYRow = ARGBToYRow_Any_NEON;
@ -605,12 +563,9 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3; ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToUV422Row = ARGBToUV422Row_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToUV422Row = ARGBToUV422Row_SSSE3; ARGBToUV422Row = ARGBToUV422Row_SSSE3;
} }
} }
}
#elif defined(HAS_ARGBTOUV422ROW_NEON) #elif defined(HAS_ARGBTOUV422ROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 16) { if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
ARGBToUV422Row = ARGBToUV422Row_Any_NEON; ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
@ -623,12 +578,9 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ARGBToYRow = ARGBToYRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3;
} }
} }
}
#elif defined(HAS_ARGBTOYROW_NEON) #elif defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) { if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGBToYRow = ARGBToYRow_Any_NEON; ARGBToYRow = ARGBToYRow_Any_NEON;
@ -700,13 +652,9 @@ int ARGBToI400(const uint8* src_argb, int src_stride_argb,
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ARGBToYRow = ARGBToYRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3;
} }
} }
}
#endif #endif
#if defined(HAS_ARGBTOYROW_AVX2) #if defined(HAS_ARGBTOYROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && width >= 32) { if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
@ -867,8 +815,7 @@ int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_rgb565 = 0; src_stride_argb = dst_stride_rgb565 = 0;
} }
#if defined(HAS_ARGBTORGB565ROW_SSE2) #if defined(HAS_ARGBTORGB565ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 4 && if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToRGB565Row = ARGBToRGB565Row_Any_SSE2; ARGBToRGB565Row = ARGBToRGB565Row_Any_SSE2;
if (IS_ALIGNED(width, 4)) { if (IS_ALIGNED(width, 4)) {
ARGBToRGB565Row = ARGBToRGB565Row_SSE2; ARGBToRGB565Row = ARGBToRGB565Row_SSE2;
@ -915,8 +862,7 @@ int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_argb1555 = 0; src_stride_argb = dst_stride_argb1555 = 0;
} }
#if defined(HAS_ARGBTOARGB1555ROW_SSE2) #if defined(HAS_ARGBTOARGB1555ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 4 && if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToARGB1555Row = ARGBToARGB1555Row_Any_SSE2; ARGBToARGB1555Row = ARGBToARGB1555Row_Any_SSE2;
if (IS_ALIGNED(width, 4)) { if (IS_ALIGNED(width, 4)) {
ARGBToARGB1555Row = ARGBToARGB1555Row_SSE2; ARGBToARGB1555Row = ARGBToARGB1555Row_SSE2;
@ -963,8 +909,7 @@ int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb,
src_stride_argb = dst_stride_argb4444 = 0; src_stride_argb = dst_stride_argb4444 = 0;
} }
#if defined(HAS_ARGBTOARGB4444ROW_SSE2) #if defined(HAS_ARGBTOARGB4444ROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 4 && if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToARGB4444Row = ARGBToARGB4444Row_Any_SSE2; ARGBToARGB4444Row = ARGBToARGB4444Row_Any_SSE2;
if (IS_ALIGNED(width, 4)) { if (IS_ALIGNED(width, 4)) {
ARGBToARGB4444Row = ARGBToARGB4444Row_SSE2; ARGBToARGB4444Row = ARGBToARGB4444Row_SSE2;
@ -1015,16 +960,10 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3; ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3;
ARGBToYJRow = ARGBToYJRow_Any_SSSE3; ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToUVJRow = ARGBToUVJRow_Unaligned_SSSE3;
ARGBToYJRow = ARGBToYJRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToUVJRow = ARGBToUVJRow_SSSE3; ARGBToUVJRow = ARGBToUVJRow_SSSE3;
if (IS_ALIGNED(dst_yj, 16) && IS_ALIGNED(dst_stride_yj, 16)) {
ARGBToYJRow = ARGBToYJRow_SSSE3; ARGBToYJRow = ARGBToYJRow_SSSE3;
} }
} }
}
}
#endif #endif
#if defined(HAS_ARGBTOYJROW_AVX2) && defined(HAS_ARGBTOUVJROW_AVX2) #if defined(HAS_ARGBTOYJROW_AVX2) && defined(HAS_ARGBTOUVJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && width >= 32) { if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
@ -1094,13 +1033,9 @@ int ARGBToJ400(const uint8* src_argb, int src_stride_argb,
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ARGBToYJRow = ARGBToYJRow_Any_SSSE3; ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToYJRow = ARGBToYJRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
IS_ALIGNED(dst_yj, 16) && IS_ALIGNED(dst_stride_yj, 16)) {
ARGBToYJRow = ARGBToYJRow_SSSE3; ARGBToYJRow = ARGBToYJRow_SSSE3;
} }
} }
}
#endif #endif
#if defined(HAS_ARGBTOYJROW_AVX2) #if defined(HAS_ARGBTOYJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && width >= 32) { if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {

View File

@ -77,8 +77,7 @@ int ARGBToBayer(const uint8* src_argb, int src_stride_argb,
src_stride_argb = -src_stride_argb; src_stride_argb = -src_stride_argb;
} }
#if defined(HAS_ARGBTOBAYERROW_SSSE3) #if defined(HAS_ARGBTOBAYERROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8 && if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3; ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) { if (IS_ALIGNED(width, 8)) {
ARGBToBayerRow = ARGBToBayerRow_SSSE3; ARGBToBayerRow = ARGBToBayerRow_SSSE3;
@ -319,11 +318,8 @@ int BayerToI420(const uint8* src_bayer, int src_stride_bayer,
ARGBToUVRow = ARGBToUVRow_Any_SSSE3; ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
ARGBToYRow = ARGBToYRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) { if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
ARGBToUVRow = ARGBToUVRow_SSSE3;
if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3;
} ARGBToUVRow = ARGBToUVRow_SSSE3;
} }
} }
#elif defined(HAS_ARGBTOYROW_NEON) #elif defined(HAS_ARGBTOYROW_NEON)

View File

@ -50,13 +50,11 @@ static void ARGBTranspose(const uint8* src, int src_stride,
void (*ScaleARGBRowDownEven)(const uint8* src_ptr, int src_stride, void (*ScaleARGBRowDownEven)(const uint8* src_ptr, int src_stride,
int src_step, uint8* dst_ptr, int dst_width) = ScaleARGBRowDownEven_C; int src_step, uint8* dst_ptr, int dst_width) = ScaleARGBRowDownEven_C;
#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2) #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(height, 4) && // Width of dest. if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(height, 4)) { // Width of dest.
IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleARGBRowDownEven = ScaleARGBRowDownEven_SSE2; ScaleARGBRowDownEven = ScaleARGBRowDownEven_SSE2;
} }
#elif defined(HAS_SCALEARGBROWDOWNEVEN_NEON) #elif defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(height, 4) && // Width of dest. if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(height, 4)) { // Width of dest.
IS_ALIGNED(src, 4)) {
ScaleARGBRowDownEven = ScaleARGBRowDownEven_NEON; ScaleARGBRowDownEven = ScaleARGBRowDownEven_NEON;
} }
#endif #endif
@ -103,9 +101,7 @@ void ARGBRotate180(const uint8* src, int src_stride,
ARGBMirrorRow_C; ARGBMirrorRow_C;
void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C; void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
#if defined(HAS_ARGBMIRRORROW_SSSE3) #if defined(HAS_ARGBMIRRORROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4) && if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) {
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
ARGBMirrorRow = ARGBMirrorRow_SSSE3; ARGBMirrorRow = ARGBMirrorRow_SSSE3;
} }
#endif #endif
@ -130,9 +126,7 @@ void ARGBRotate180(const uint8* src, int src_stride,
} }
#endif #endif
#if defined(HAS_COPYROW_SSE2) #if defined(HAS_COPYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width * 4, 32) && if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width * 4, 32)) {
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
CopyRow = CopyRow_SSE2; CopyRow = CopyRow_SSE2;
} }
#endif #endif

View File

@ -35,19 +35,19 @@ extern "C" {
} }
#ifdef HAS_I422TOARGBROW_SSSE3 #ifdef HAS_I422TOARGBROW_SSSE3
YANY(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_Unaligned_SSSE3, I422ToARGBRow_C, YANY(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, I422ToARGBRow_C,
1, 4, 7) 1, 4, 7)
#endif // HAS_I422TOARGBROW_SSSE3 #endif // HAS_I422TOARGBROW_SSSE3
#ifdef HAS_I444TOARGBROW_SSSE3 #ifdef HAS_I444TOARGBROW_SSSE3
YANY(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_Unaligned_SSSE3, I444ToARGBRow_C, YANY(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, I444ToARGBRow_C,
0, 4, 7) 0, 4, 7)
YANY(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_Unaligned_SSSE3, I411ToARGBRow_C, YANY(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_SSSE3, I411ToARGBRow_C,
2, 4, 7) 2, 4, 7)
YANY(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_Unaligned_SSSE3, I422ToBGRARow_C, YANY(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_SSSE3, I422ToBGRARow_C,
1, 4, 7) 1, 4, 7)
YANY(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_Unaligned_SSSE3, I422ToABGRRow_C, YANY(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_SSSE3, I422ToABGRRow_C,
1, 4, 7) 1, 4, 7)
YANY(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_Unaligned_SSSE3, I422ToRGBARow_C, YANY(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, I422ToRGBARow_C,
1, 4, 7) 1, 4, 7)
// I422ToRGB565Row_SSSE3 is unaligned. // I422ToRGB565Row_SSSE3 is unaligned.
YANY(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, I422ToARGB4444Row_C, YANY(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, I422ToARGB4444Row_C,
@ -102,9 +102,9 @@ YANY(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, I422ToUYVYRow_C, 1, 2, 15)
} }
#ifdef HAS_NV12TOARGBROW_SSSE3 #ifdef HAS_NV12TOARGBROW_SSSE3
NV2NY(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_Unaligned_SSSE3, NV12ToARGBRow_C, NV2NY(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_SSSE3, NV12ToARGBRow_C,
0, 4) 0, 4)
NV2NY(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_Unaligned_SSSE3, NV21ToARGBRow_C, NV2NY(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_SSSE3, NV21ToARGBRow_C,
0, 4) 0, 4)
#endif // HAS_NV12TOARGBROW_SSSE3 #endif // HAS_NV12TOARGBROW_SSSE3
#ifdef HAS_NV12TOARGBROW_NEON #ifdef HAS_NV12TOARGBROW_NEON
@ -145,15 +145,15 @@ RGBANY(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, ARGBToARGB4444Row_C,
3, 4, 2) 3, 4, 2)
#endif #endif
#if defined(HAS_I400TOARGBROW_SSE2) #if defined(HAS_I400TOARGBROW_SSE2)
RGBANY(I400ToARGBRow_Any_SSE2, I400ToARGBRow_Unaligned_SSE2, I400ToARGBRow_C, RGBANY(I400ToARGBRow_Any_SSE2, I400ToARGBRow_SSE2, I400ToARGBRow_C,
7, 1, 4) 7, 1, 4)
#endif #endif
#if defined(HAS_YTOARGBROW_SSE2) #if defined(HAS_YTOARGBROW_SSE2)
RGBANY(YToARGBRow_Any_SSE2, YToARGBRow_SSE2, YToARGBRow_C, RGBANY(YToARGBRow_Any_SSE2, YToARGBRow_SSE2, YToARGBRow_C,
7, 1, 4) 7, 1, 4)
RGBANY(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_Unaligned_SSSE3, YUY2ToARGBRow_C, RGBANY(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_SSSE3, YUY2ToARGBRow_C,
15, 2, 4) 15, 2, 4)
RGBANY(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_Unaligned_SSSE3, UYVYToARGBRow_C, RGBANY(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_SSSE3, UYVYToARGBRow_C,
15, 2, 4) 15, 2, 4)
// These require alignment on ARGB, so C is used for remainder. // These require alignment on ARGB, so C is used for remainder.
RGBANY(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, RGB24ToARGBRow_C, RGBANY(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, RGB24ToARGBRow_C,
@ -231,17 +231,17 @@ YANY(YUY2ToYRow_Any_AVX2, YUY2ToYRow_AVX2, 2, 1, 32)
YANY(UYVYToYRow_Any_AVX2, UYVYToYRow_AVX2, 2, 1, 32) YANY(UYVYToYRow_Any_AVX2, UYVYToYRow_AVX2, 2, 1, 32)
#endif #endif
#ifdef HAS_ARGBTOYROW_SSSE3 #ifdef HAS_ARGBTOYROW_SSSE3
YANY(ARGBToYRow_Any_SSSE3, ARGBToYRow_Unaligned_SSSE3, 4, 1, 16) YANY(ARGBToYRow_Any_SSSE3, ARGBToYRow_SSSE3, 4, 1, 16)
#endif #endif
#ifdef HAS_BGRATOYROW_SSSE3 #ifdef HAS_BGRATOYROW_SSSE3
YANY(BGRAToYRow_Any_SSSE3, BGRAToYRow_Unaligned_SSSE3, 4, 1, 16) YANY(BGRAToYRow_Any_SSSE3, BGRAToYRow_SSSE3, 4, 1, 16)
YANY(ABGRToYRow_Any_SSSE3, ABGRToYRow_Unaligned_SSSE3, 4, 1, 16) YANY(ABGRToYRow_Any_SSSE3, ABGRToYRow_SSSE3, 4, 1, 16)
YANY(RGBAToYRow_Any_SSSE3, RGBAToYRow_Unaligned_SSSE3, 4, 1, 16) YANY(RGBAToYRow_Any_SSSE3, RGBAToYRow_SSSE3, 4, 1, 16)
YANY(YUY2ToYRow_Any_SSE2, YUY2ToYRow_Unaligned_SSE2, 2, 1, 16) YANY(YUY2ToYRow_Any_SSE2, YUY2ToYRow_SSE2, 2, 1, 16)
YANY(UYVYToYRow_Any_SSE2, UYVYToYRow_Unaligned_SSE2, 2, 1, 16) YANY(UYVYToYRow_Any_SSE2, UYVYToYRow_SSE2, 2, 1, 16)
#endif #endif
#ifdef HAS_ARGBTOYJROW_SSSE3 #ifdef HAS_ARGBTOYJROW_SSSE3
YANY(ARGBToYJRow_Any_SSSE3, ARGBToYJRow_Unaligned_SSSE3, 4, 1, 16) YANY(ARGBToYJRow_Any_SSSE3, ARGBToYJRow_SSSE3, 4, 1, 16)
#endif #endif
#ifdef HAS_ARGBTOYROW_NEON #ifdef HAS_ARGBTOYROW_NEON
YANY(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 4, 1, 8) YANY(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 4, 1, 8)
@ -349,14 +349,14 @@ UVANY(YUY2ToUVRow_Any_AVX2, YUY2ToUVRow_AVX2, YUY2ToUVRow_C, 2, 31)
UVANY(UYVYToUVRow_Any_AVX2, UYVYToUVRow_AVX2, UYVYToUVRow_C, 2, 31) UVANY(UYVYToUVRow_Any_AVX2, UYVYToUVRow_AVX2, UYVYToUVRow_C, 2, 31)
#endif #endif
#ifdef HAS_ARGBTOUVROW_SSSE3 #ifdef HAS_ARGBTOUVROW_SSSE3
UVANY(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_Unaligned_SSSE3, ARGBToUVRow_C, 4, 15) UVANY(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_SSSE3, ARGBToUVRow_C, 4, 15)
UVANY(ARGBToUVJRow_Any_SSSE3, ARGBToUVJRow_Unaligned_SSSE3, ARGBToUVJRow_C, UVANY(ARGBToUVJRow_Any_SSSE3, ARGBToUVJRow_SSSE3, ARGBToUVJRow_C,
4, 15) 4, 15)
UVANY(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_Unaligned_SSSE3, BGRAToUVRow_C, 4, 15) UVANY(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_SSSE3, BGRAToUVRow_C, 4, 15)
UVANY(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_Unaligned_SSSE3, ABGRToUVRow_C, 4, 15) UVANY(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_SSSE3, ABGRToUVRow_C, 4, 15)
UVANY(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_Unaligned_SSSE3, RGBAToUVRow_C, 4, 15) UVANY(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_SSSE3, RGBAToUVRow_C, 4, 15)
UVANY(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_Unaligned_SSE2, YUY2ToUVRow_C, 2, 15) UVANY(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_SSE2, YUY2ToUVRow_C, 2, 15)
UVANY(UYVYToUVRow_Any_SSE2, UYVYToUVRow_Unaligned_SSE2, UYVYToUVRow_C, 2, 15) UVANY(UYVYToUVRow_Any_SSE2, UYVYToUVRow_SSE2, UYVYToUVRow_C, 2, 15)
#endif #endif
#ifdef HAS_ARGBTOUVROW_NEON #ifdef HAS_ARGBTOUVROW_NEON
UVANY(ARGBToUVRow_Any_NEON, ARGBToUVRow_NEON, ARGBToUVRow_C, 4, 15) UVANY(ARGBToUVRow_Any_NEON, ARGBToUVRow_NEON, ARGBToUVRow_C, 4, 15)
@ -408,7 +408,7 @@ UVANY(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, UYVYToUVRow_C, 2, 15)
} }
#ifdef HAS_ARGBTOUV444ROW_SSSE3 #ifdef HAS_ARGBTOUV444ROW_SSSE3
UV422ANY(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_Unaligned_SSSE3, UV422ANY(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_SSSE3,
ARGBToUV444Row_C, 4, 15, 0) ARGBToUV444Row_C, 4, 15, 0)
#endif #endif
#ifdef HAS_YUY2TOUV422ROW_AVX2 #ifdef HAS_YUY2TOUV422ROW_AVX2
@ -418,11 +418,11 @@ UV422ANY(UYVYToUV422Row_Any_AVX2, UYVYToUV422Row_AVX2,
UYVYToUV422Row_C, 2, 31, 1) UYVYToUV422Row_C, 2, 31, 1)
#endif #endif
#ifdef HAS_ARGBTOUVROW_SSSE3 #ifdef HAS_ARGBTOUVROW_SSSE3
UV422ANY(ARGBToUV422Row_Any_SSSE3, ARGBToUV422Row_Unaligned_SSSE3, UV422ANY(ARGBToUV422Row_Any_SSSE3, ARGBToUV422Row_SSSE3,
ARGBToUV422Row_C, 4, 15, 1) ARGBToUV422Row_C, 4, 15, 1)
UV422ANY(YUY2ToUV422Row_Any_SSE2, YUY2ToUV422Row_Unaligned_SSE2, UV422ANY(YUY2ToUV422Row_Any_SSE2, YUY2ToUV422Row_SSE2,
YUY2ToUV422Row_C, 2, 15, 1) YUY2ToUV422Row_C, 2, 15, 1)
UV422ANY(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_Unaligned_SSE2, UV422ANY(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_SSE2,
UYVYToUV422Row_C, 2, 15, 1) UYVYToUV422Row_C, 2, 15, 1)
#endif #endif
#ifdef HAS_YUY2TOUV422ROW_NEON #ifdef HAS_YUY2TOUV422ROW_NEON
@ -451,7 +451,7 @@ UV422ANY(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON,
} }
#ifdef HAS_SPLITUVROW_SSE2 #ifdef HAS_SPLITUVROW_SSE2
SPLITUVROWANY(SplitUVRow_Any_SSE2, SplitUVRow_Unaligned_SSE2, SplitUVRow_C, 15) SPLITUVROWANY(SplitUVRow_Any_SSE2, SplitUVRow_SSE2, SplitUVRow_C, 15)
#endif #endif
#ifdef HAS_SPLITUVROW_AVX2 #ifdef HAS_SPLITUVROW_AVX2
SPLITUVROWANY(SplitUVRow_Any_AVX2, SplitUVRow_AVX2, SplitUVRow_C, 31) SPLITUVROWANY(SplitUVRow_Any_AVX2, SplitUVRow_AVX2, SplitUVRow_C, 31)
@ -460,7 +460,7 @@ SPLITUVROWANY(SplitUVRow_Any_AVX2, SplitUVRow_AVX2, SplitUVRow_C, 31)
SPLITUVROWANY(SplitUVRow_Any_NEON, SplitUVRow_NEON, SplitUVRow_C, 15) SPLITUVROWANY(SplitUVRow_Any_NEON, SplitUVRow_NEON, SplitUVRow_C, 15)
#endif #endif
#ifdef HAS_SPLITUVROW_MIPS_DSPR2 #ifdef HAS_SPLITUVROW_MIPS_DSPR2
SPLITUVROWANY(SplitUVRow_Any_MIPS_DSPR2, SplitUVRow_Unaligned_MIPS_DSPR2, SPLITUVROWANY(SplitUVRow_Any_MIPS_DSPR2, SplitUVRow_MIPS_DSPR2,
SplitUVRow_C, 15) SplitUVRow_C, 15)
#endif #endif
#undef SPLITUVROWANY #undef SPLITUVROWANY
@ -477,7 +477,7 @@ SPLITUVROWANY(SplitUVRow_Any_MIPS_DSPR2, SplitUVRow_Unaligned_MIPS_DSPR2,
} }
#ifdef HAS_MERGEUVROW_SSE2 #ifdef HAS_MERGEUVROW_SSE2
MERGEUVROW_ANY(MergeUVRow_Any_SSE2, MergeUVRow_Unaligned_SSE2, MergeUVRow_C, 15) MERGEUVROW_ANY(MergeUVRow_Any_SSE2, MergeUVRow_SSE2, MergeUVRow_C, 15)
#endif #endif
#ifdef HAS_MERGEUVROW_AVX2 #ifdef HAS_MERGEUVROW_AVX2
MERGEUVROW_ANY(MergeUVRow_Any_AVX2, MergeUVRow_AVX2, MergeUVRow_C, 31) MERGEUVROW_ANY(MergeUVRow_Any_AVX2, MergeUVRow_AVX2, MergeUVRow_C, 31)
@ -548,7 +548,7 @@ YANY(ARGBShuffleRow_Any_SSE2, ARGBShuffleRow_SSE2,
ARGBShuffleRow_C, 4, 4, 3) ARGBShuffleRow_C, 4, 4, 3)
#endif #endif
#ifdef HAS_ARGBSHUFFLEROW_SSSE3 #ifdef HAS_ARGBSHUFFLEROW_SSSE3
YANY(ARGBShuffleRow_Any_SSSE3, ARGBShuffleRow_Unaligned_SSSE3, YANY(ARGBShuffleRow_Any_SSSE3, ARGBShuffleRow_SSSE3,
ARGBShuffleRow_C, 4, 4, 7) ARGBShuffleRow_C, 4, 4, 7)
#endif #endif
#ifdef HAS_ARGBSHUFFLEROW_AVX2 #ifdef HAS_ARGBSHUFFLEROW_AVX2

View File

@ -2137,19 +2137,6 @@ void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
free_aligned_buffer_64(row_y); free_aligned_buffer_64(row_y);
} }
void YUY2ToARGBRow_Unaligned_SSSE3(const uint8* src_yuy2,
uint8* dst_argb,
int width) {
// Allocate a rows of yuv.
align_buffer_64(row_y, ((width + 63) & ~63) * 2);
uint8* row_u = row_y + ((width + 63) & ~63);
uint8* row_v = row_u + ((width + 63) & ~63) / 2;
YUY2ToUV422Row_Unaligned_SSE2(src_yuy2, row_u, row_v, width);
YUY2ToYRow_Unaligned_SSE2(src_yuy2, row_y, width);
I422ToARGBRow_Unaligned_SSSE3(row_y, row_u, row_v, dst_argb, width);
free_aligned_buffer_64(row_y);
}
void UYVYToARGBRow_SSSE3(const uint8* src_uyvy, void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
uint8* dst_argb, uint8* dst_argb,
int width) { int width) {
@ -2163,19 +2150,6 @@ void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
free_aligned_buffer_64(row_y); free_aligned_buffer_64(row_y);
} }
void UYVYToARGBRow_Unaligned_SSSE3(const uint8* src_uyvy,
uint8* dst_argb,
int width) {
// Allocate a rows of yuv.
align_buffer_64(row_y, ((width + 63) & ~63) * 2);
uint8* row_u = row_y + ((width + 63) & ~63);
uint8* row_v = row_u + ((width + 63) & ~63) / 2;
UYVYToUV422Row_Unaligned_SSE2(src_uyvy, row_u, row_v, width);
UYVYToYRow_Unaligned_SSE2(src_uyvy, row_y, width);
I422ToARGBRow_Unaligned_SSSE3(row_y, row_u, row_v, dst_argb, width);
free_aligned_buffer_64(row_y);
}
#endif // defined(_M_IX86) || defined(__x86_64__) || defined(__i386__) #endif // defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)
#endif // !defined(LIBYUV_DISABLE_X86) #endif // !defined(LIBYUV_DISABLE_X86)

View File

@ -447,89 +447,6 @@ void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
); );
} }
void SplitUVRow_Unaligned_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u,
uint8* dst_v, int width) {
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"srl $t4, %[width], 4 \n" // multiplies of 16
"blez $t4, 2f \n"
" andi %[width], %[width], 0xf \n" // residual
".p2align 2 \n"
"1: \n"
"addiu $t4, $t4, -1 \n"
"lwr $t0, 0(%[src_uv]) \n"
"lwl $t0, 3(%[src_uv]) \n" // V1 | U1 | V0 | U0
"lwr $t1, 4(%[src_uv]) \n"
"lwl $t1, 7(%[src_uv]) \n" // V3 | U3 | V2 | U2
"lwr $t2, 8(%[src_uv]) \n"
"lwl $t2, 11(%[src_uv]) \n" // V5 | U5 | V4 | U4
"lwr $t3, 12(%[src_uv]) \n"
"lwl $t3, 15(%[src_uv]) \n" // V7 | U7 | V6 | U6
"lwr $t5, 16(%[src_uv]) \n"
"lwl $t5, 19(%[src_uv]) \n" // V9 | U9 | V8 | U8
"lwr $t6, 20(%[src_uv]) \n"
"lwl $t6, 23(%[src_uv]) \n" // V11 | U11 | V10 | U10
"lwr $t7, 24(%[src_uv]) \n"
"lwl $t7, 27(%[src_uv]) \n" // V13 | U13 | V12 | U12
"lwr $t8, 28(%[src_uv]) \n"
"lwl $t8, 31(%[src_uv]) \n" // V15 | U15 | V14 | U14
"precrq.qb.ph $t9, $t1, $t0 \n" // V3 | V2 | V1 | V0
"precr.qb.ph $t0, $t1, $t0 \n" // U3 | U2 | U1 | U0
"precrq.qb.ph $t1, $t3, $t2 \n" // V7 | V6 | V5 | V4
"precr.qb.ph $t2, $t3, $t2 \n" // U7 | U6 | U5 | U4
"precrq.qb.ph $t3, $t6, $t5 \n" // V11 | V10 | V9 | V8
"precr.qb.ph $t5, $t6, $t5 \n" // U11 | U10 | U9 | U8
"precrq.qb.ph $t6, $t8, $t7 \n" // V15 | V14 | V13 | V12
"precr.qb.ph $t7, $t8, $t7 \n" // U15 | U14 | U13 | U12
"addiu %[src_uv], %[src_uv], 32 \n"
"swr $t9, 0(%[dst_v]) \n"
"swl $t9, 3(%[dst_v]) \n"
"swr $t0, 0(%[dst_u]) \n"
"swl $t0, 3(%[dst_u]) \n"
"swr $t1, 4(%[dst_v]) \n"
"swl $t1, 7(%[dst_v]) \n"
"swr $t2, 4(%[dst_u]) \n"
"swl $t2, 7(%[dst_u]) \n"
"swr $t3, 8(%[dst_v]) \n"
"swl $t3, 11(%[dst_v]) \n"
"swr $t5, 8(%[dst_u]) \n"
"swl $t5, 11(%[dst_u]) \n"
"swr $t6, 12(%[dst_v]) \n"
"swl $t6, 15(%[dst_v]) \n"
"swr $t7, 12(%[dst_u]) \n"
"swl $t7, 15(%[dst_u]) \n"
"addiu %[dst_u], %[dst_u], 16 \n"
"bgtz $t4, 1b \n"
" addiu %[dst_v], %[dst_v], 16 \n"
"beqz %[width], 3f \n"
" nop \n"
"2: \n"
"lbu $t0, 0(%[src_uv]) \n"
"lbu $t1, 1(%[src_uv]) \n"
"addiu %[src_uv], %[src_uv], 2 \n"
"addiu %[width], %[width], -1 \n"
"sb $t0, 0(%[dst_u]) \n"
"sb $t1, 0(%[dst_v]) \n"
"addiu %[dst_u], %[dst_u], 1 \n"
"bgtz %[width], 2b \n"
" addiu %[dst_v], %[dst_v], 1 \n"
"3: \n"
".set pop \n"
: [src_uv] "+r" (src_uv),
[width] "+r" (width),
[dst_u] "+r" (dst_u),
[dst_v] "+r" (dst_v)
:
: "t0", "t1", "t2", "t3",
"t4", "t5", "t6", "t7", "t8", "t9"
);
}
void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width) { void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width) {
__asm__ __volatile__ ( __asm__ __volatile__ (
".set push \n" ".set push \n"

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -154,9 +154,7 @@ static void ScalePlaneDown4(int src_width, int src_height,
ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON; ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON;
} }
#elif defined(HAS_SCALEROWDOWN4_SSE2) #elif defined(HAS_SCALEROWDOWN4_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
IS_ALIGNED(dst_width, 8) && IS_ALIGNED(row_stride, 16) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSE2 : ScaleRowDown4_SSE2; ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSE2 : ScaleRowDown4_SSE2;
} }
#elif defined(HAS_SCALEROWDOWN4_MIPS_DSPR2) #elif defined(HAS_SCALEROWDOWN4_MIPS_DSPR2)
@ -198,9 +196,7 @@ static void ScalePlaneDown4_16(int src_width, int src_height,
ScaleRowDown4_16_NEON; ScaleRowDown4_16_NEON;
} }
#elif defined(HAS_SCALEROWDOWN4_16_SSE2) #elif defined(HAS_SCALEROWDOWN4_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
IS_ALIGNED(dst_width, 8) && IS_ALIGNED(row_stride, 16) &&
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_SSE2 : ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_SSE2 :
ScaleRowDown4_16_SSE2; ScaleRowDown4_16_SSE2;
} }
@ -256,8 +252,7 @@ static void ScalePlaneDown34(int src_width, int src_height,
} }
#endif #endif
#if defined(HAS_SCALEROWDOWN34_SSSE3) #if defined(HAS_SCALEROWDOWN34_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) && if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
if (!filtering) { if (!filtering) {
ScaleRowDown34_0 = ScaleRowDown34_SSSE3; ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
ScaleRowDown34_1 = ScaleRowDown34_SSSE3; ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
@ -336,8 +331,7 @@ static void ScalePlaneDown34_16(int src_width, int src_height,
} }
#endif #endif
#if defined(HAS_SCALEROWDOWN34_16_SSSE3) #if defined(HAS_SCALEROWDOWN34_16_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) && if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
if (!filtering) { if (!filtering) {
ScaleRowDown34_0 = ScaleRowDown34_16_SSSE3; ScaleRowDown34_0 = ScaleRowDown34_16_SSSE3;
ScaleRowDown34_1 = ScaleRowDown34_16_SSSE3; ScaleRowDown34_1 = ScaleRowDown34_16_SSSE3;
@ -431,8 +425,7 @@ static void ScalePlaneDown38(int src_width, int src_height,
} }
} }
#elif defined(HAS_SCALEROWDOWN38_SSSE3) #elif defined(HAS_SCALEROWDOWN38_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) && if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
if (!filtering) { if (!filtering) {
ScaleRowDown38_3 = ScaleRowDown38_SSSE3; ScaleRowDown38_3 = ScaleRowDown38_SSSE3;
ScaleRowDown38_2 = ScaleRowDown38_SSSE3; ScaleRowDown38_2 = ScaleRowDown38_SSSE3;
@ -508,8 +501,7 @@ static void ScalePlaneDown38_16(int src_width, int src_height,
} }
} }
#elif defined(HAS_SCALEROWDOWN38_16_SSSE3) #elif defined(HAS_SCALEROWDOWN38_16_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) && if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
if (!filtering) { if (!filtering) {
ScaleRowDown38_3 = ScaleRowDown38_16_SSSE3; ScaleRowDown38_3 = ScaleRowDown38_16_SSSE3;
ScaleRowDown38_2 = ScaleRowDown38_16_SSSE3; ScaleRowDown38_2 = ScaleRowDown38_16_SSSE3;
@ -743,11 +735,11 @@ static void ScalePlaneBox(int src_width, int src_height,
uint16* dst_ptr, int src_width, int src_height) = ScaleAddRows_C; uint16* dst_ptr, int src_width, int src_height) = ScaleAddRows_C;
#if defined(HAS_SCALEADDROWS_SSE2) #if defined(HAS_SCALEADDROWS_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2)
#ifdef AVOID_OVERREAD #ifdef AVOID_OVERREAD
IS_ALIGNED(src_width, 16) && && IS_ALIGNED(src_width, 16)
#endif #endif
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { ) {
ScaleAddRows = ScaleAddRows_SSE2; ScaleAddRows = ScaleAddRows_SSE2;
} }
#endif #endif
@ -815,11 +807,11 @@ static void ScalePlaneBox_16(int src_width, int src_height,
uint32* dst_ptr, int src_width, int src_height) = ScaleAddRows_16_C; uint32* dst_ptr, int src_width, int src_height) = ScaleAddRows_16_C;
#if defined(HAS_SCALEADDROWS_16_SSE2) #if defined(HAS_SCALEADDROWS_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && if (TestCpuFlag(kCpuHasSSE2)
#ifdef AVOID_OVERREAD #ifdef AVOID_OVERREAD
IS_ALIGNED(src_width, 16) && && IS_ALIGNED(src_width, 16)
#endif #endif
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { ) {
ScaleAddRows = ScaleAddRows_16_SSE2; ScaleAddRows = ScaleAddRows_16_SSE2;
} }
#endif #endif
@ -1111,9 +1103,7 @@ void ScalePlaneBilinearUp(int src_width, int src_height,
if (!filtering && src_width * 2 == dst_width && x < 0x8000) { if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
ScaleFilterCols = ScaleColsUp2_C; ScaleFilterCols = ScaleColsUp2_C;
#if defined(HAS_SCALECOLS_SSE2) #if defined(HAS_SCALECOLS_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) && if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleFilterCols = ScaleColsUp2_SSE2; ScaleFilterCols = ScaleColsUp2_SSE2;
} }
#endif #endif
@ -1244,9 +1234,7 @@ void ScalePlaneBilinearUp_16(int src_width, int src_height,
if (!filtering && src_width * 2 == dst_width && x < 0x8000) { if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
ScaleFilterCols = ScaleColsUp2_16_C; ScaleFilterCols = ScaleColsUp2_16_C;
#if defined(HAS_SCALECOLS_16_SSE2) #if defined(HAS_SCALECOLS_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) && if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleFilterCols = ScaleColsUp2_16_SSE2; ScaleFilterCols = ScaleColsUp2_16_SSE2;
} }
#endif #endif
@ -1327,9 +1315,7 @@ static void ScalePlaneSimple(int src_width, int src_height,
if (src_width * 2 == dst_width && x < 0x8000) { if (src_width * 2 == dst_width && x < 0x8000) {
ScaleCols = ScaleColsUp2_C; ScaleCols = ScaleColsUp2_C;
#if defined(HAS_SCALECOLS_SSE2) #if defined(HAS_SCALECOLS_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) && if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleCols = ScaleColsUp2_SSE2; ScaleCols = ScaleColsUp2_SSE2;
} }
#endif #endif
@ -1362,9 +1348,7 @@ static void ScalePlaneSimple_16(int src_width, int src_height,
if (src_width * 2 == dst_width && x < 0x8000) { if (src_width * 2 == dst_width && x < 0x8000) {
ScaleCols = ScaleColsUp2_16_C; ScaleCols = ScaleColsUp2_16_C;
#if defined(HAS_SCALECOLS_16_SSE2) #if defined(HAS_SCALECOLS_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) && if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleCols = ScaleColsUp2_16_SSE2; ScaleCols = ScaleColsUp2_16_SSE2;
} }
#endif #endif

View File

@ -53,16 +53,13 @@ static void ScaleARGBDown2(int src_width, int src_height,
} }
#if defined(HAS_SCALEARGBROWDOWN2_SSE2) #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) && if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4)) {
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_SSE2 : ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_SSE2 :
(filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 :
ScaleARGBRowDown2Box_SSE2); ScaleARGBRowDown2Box_SSE2);
} }
#elif defined(HAS_SCALEARGBROWDOWN2_NEON) #elif defined(HAS_SCALEARGBROWDOWN2_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) && if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) {
IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) {
ScaleARGBRowDown2 = filtering ? ScaleARGBRowDown2Box_NEON : ScaleARGBRowDown2 = filtering ? ScaleARGBRowDown2Box_NEON :
ScaleARGBRowDown2_NEON; ScaleARGBRowDown2_NEON;
} }
@ -98,14 +95,11 @@ static void ScaleARGBDown4Box(int src_width, int src_height,
assert(dx == 65536 * 4); // Test scale factor of 4. assert(dx == 65536 * 4); // Test scale factor of 4.
assert((dy & 0x3ffff) == 0); // Test vertical scale is multiple of 4. assert((dy & 0x3ffff) == 0); // Test vertical scale is multiple of 4.
#if defined(HAS_SCALEARGBROWDOWN2_SSE2) #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) && if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4)) {
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2; ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2;
} }
#elif defined(HAS_SCALEARGBROWDOWN2_NEON) #elif defined(HAS_SCALEARGBROWDOWN2_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) && if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) {
IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) {
ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON; ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON;
} }
#endif #endif
@ -139,14 +133,12 @@ static void ScaleARGBDownEven(int src_width, int src_height,
assert(IS_ALIGNED(src_height, 2)); assert(IS_ALIGNED(src_height, 2));
src_argb += (y >> 16) * src_stride + (x >> 16) * 4; src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2) #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) && if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4)) {
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 : ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 :
ScaleARGBRowDownEven_SSE2; ScaleARGBRowDownEven_SSE2;
} }
#elif defined(HAS_SCALEARGBROWDOWNEVEN_NEON) #elif defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 4) && if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 4)) {
IS_ALIGNED(src_argb, 4)) {
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON : ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON :
ScaleARGBRowDownEven_NEON; ScaleARGBRowDownEven_NEON;
} }
@ -334,9 +326,7 @@ static void ScaleARGBBilinearUp(int src_width, int src_height,
if (!filtering && src_width * 2 == dst_width && x < 0x8000) { if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
ScaleARGBFilterCols = ScaleARGBColsUp2_C; ScaleARGBFilterCols = ScaleARGBColsUp2_C;
#if defined(HAS_SCALEARGBCOLSUP2_SSE2) #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) && if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2; ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
} }
#endif #endif
@ -510,9 +500,7 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
if (!filtering && src_width * 2 == dst_width && x < 0x8000) { if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
ScaleARGBFilterCols = ScaleARGBColsUp2_C; ScaleARGBFilterCols = ScaleARGBColsUp2_C;
#if defined(HAS_SCALEARGBCOLSUP2_SSE2) #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) && if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2; ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
} }
#endif #endif
@ -619,9 +607,7 @@ static void ScaleARGBSimple(int src_width, int src_height,
if (src_width * 2 == dst_width && x < 0x8000) { if (src_width * 2 == dst_width && x < 0x8000) {
ScaleARGBCols = ScaleARGBColsUp2_C; ScaleARGBCols = ScaleARGBColsUp2_C;
#if defined(HAS_SCALEARGBCOLSUP2_SSE2) #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) && if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleARGBCols = ScaleARGBColsUp2_SSE2; ScaleARGBCols = ScaleARGBColsUp2_SSE2;
} }
#endif #endif