From f08ac6bb095348565b5259f2fab95f259ef47ede Mon Sep 17 00:00:00 2001 From: "fbarchard@google.com" Date: Thu, 15 Nov 2012 00:21:14 +0000 Subject: [PATCH] Rename row functions so they are all SomethingRow_CPU BUG=133 TEST=still builds Review URL: https://webrtc-codereview.appspot.com/939020 git-svn-id: http://libyuv.googlecode.com/svn/trunk@491 16f28f9a-4ce2-e073-06de-1de4eb20be90 --- README.chromium | 2 +- include/libyuv/row.h | 132 ++++++++++++++++++------------------ include/libyuv/version.h | 2 +- source/convert.cc | 38 +++++------ source/convert_from.cc | 30 ++++---- source/convert_from_argb.cc | 64 ++++++++--------- source/planar_functions.cc | 30 ++++---- source/rotate.cc | 2 +- source/row_any.cc | 36 +++++----- source/row_common.cc | 14 ++-- source/row_mips.cc | 18 +++-- source/row_neon.cc | 27 ++++---- source/row_posix.cc | 39 +++++------ source/row_win.cc | 39 +++++------ source/row_x86.asm | 38 +++++------ 15 files changed, 257 insertions(+), 254 deletions(-) diff --git a/README.chromium b/README.chromium index dda2eaa01..4adf2267b 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 490 +Version: 491 License: BSD License File: LICENSE diff --git a/include/libyuv/row.h b/include/libyuv/row.h index c6ed6ec6b..d5cb0f100 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -71,9 +71,9 @@ extern "C" { #define HAS_I422TOUYVYROW_SSE2 #define HAS_I422TOYUY2ROW_SSE2 #define HAS_I444TOARGBROW_SSSE3 -#define HAS_MERGEUV_SSE2 +#define HAS_MERGEUVROW_SSE2 #define HAS_MIRRORROW_SSSE3 -#define HAS_MirrorUVRow_SSSE3 +#define HAS_MIRRORUVROW_SSSE3 #define HAS_NV12TOARGBROW_SSSE3 #define HAS_NV12TORGB565ROW_SSSE3 #define HAS_NV21TOARGBROW_SSSE3 @@ -87,7 +87,7 @@ extern "C" { #define HAS_RGBATOUVROW_SSSE3 #define HAS_RGBATOYROW_SSSE3 #define HAS_SETROW_X86 -#define HAS_SPLITUV_SSE2 +#define HAS_SPLITUVROW_SSE2 #define HAS_UYVYTOARGBROW_SSSE3 #define HAS_UYVYTOUV422ROW_SSE2 #define HAS_UYVYTOUVROW_SSE2 @@ -108,10 +108,10 @@ extern "C" { #define HAS_ARGBMIRRORROW_SSSE3 #define HAS_ARGBQUANTIZEROW_SSE2 #define HAS_ARGBSEPIAROW_SSSE3 -#define HAS_ARGBSHADE_SSE2 +#define HAS_ARGBSHADEROW_SSE2 #define HAS_ARGBUNATTENUATEROW_SSE2 #define HAS_COMPUTECUMULATIVESUMROW_SSE2 -#define HAS_CUMULATIVESUMTOAVERAGE_SSE2 +#define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2 #endif // The following are Windows only. @@ -125,14 +125,14 @@ extern "C" { #if !defined(YUV_DISABLE_ASM) && defined(HAVE_YASM) (defined(_M_IX86) || defined(_M_X64) || \ defined(__x86_64__) || defined(__i386__)) -#define HAS_SPLITUV_AVX2 -#define HAS_SPLITUV_MMX +#define HAS_SPLITUVROW_AVX2 +#define HAS_SPLITUVROW_MMX #define HAS_YUY2TOYROW_AVX2 #define HAS_UYVYTOYROW_AVX2 #define HAS_YUY2TOYROW_MMX #define HAS_UYVYTOYROW_MMX -#define HAS_MERGEUV_AVX2 -#define HAS_MERGEUV_MMX +#define HAS_MERGEUVROW_AVX2 +#define HAS_MERGEUVROW_MMX #endif // The following are disabled when SSSE3 is available: @@ -186,7 +186,7 @@ extern "C" { #define HAS_I422TOUYVYROW_NEON #define HAS_I422TOYUY2ROW_NEON #define HAS_I444TOARGBROW_NEON -#define HAS_MERGEUV_NEON +#define HAS_MERGEUVROW_NEON #define HAS_MIRRORROW_NEON #define HAS_MIRRORUVROW_NEON #define HAS_NV12TOARGBROW_NEON @@ -206,7 +206,7 @@ extern "C" { #define HAS_RGBATOUVROW_NEON #define HAS_RGBATOYROW_NEON #define HAS_SETROW_NEON -#define HAS_SPLITUV_NEON +#define HAS_SPLITUVROW_NEON #define HAS_UYVYTOARGBROW_NEON #define HAS_UYVYTOUV422ROW_NEON #define HAS_UYVYTOUVROW_NEON @@ -223,9 +223,9 @@ extern "C" { #if !defined(YUV_DISABLE_ASM) && defined(__mips__) #define HAS_COPYROW_MIPS #if defined(__mips_dsp) && (__mips_dsp_rev >= 2) -#define HAS_SPLITUV_MIPS_DSPR2 +#define HAS_SPLITUVROW_MIPS_DSPR2 #define HAS_MIRRORROW_MIPS_DSPR2 -#define HAS_MirrorUVRow_MIPS_DSPR2 +#define HAS_MIRRORUVROW_MIPS_DSPR2 #define HAS_I422TOARGBROW_MIPS_DSPR2 #define HAS_I422TOBGRAROW_MIPS_DSPR2 #define HAS_I422TOABGRROW_MIPS_DSPR2 @@ -514,46 +514,49 @@ void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width); void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width); void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width); -void SplitUV_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); -void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); -void SplitUV_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); -void SplitUV_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); -void SplitUV_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int pix); -void SplitUV_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int pix); -void SplitUV_Unaligned_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int pix); -void SplitUV_Unaligned_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int pix); -void SplitUV_Unaligned_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, - uint8* dst_v, int pix); -void SplitUV_Any_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); -void SplitUV_Any_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); -void SplitUV_Any_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); -void SplitUV_Any_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int pix); +void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); +void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); +void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); +void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); +void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, + int pix); +void SplitUVRow_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, + int pix); +void SplitUVRow_Unaligned_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, + int pix); +void SplitUVRow_Unaligned_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, + int pix); +void SplitUVRow_Unaligned_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, + uint8* dst_v, int pix); +void SplitUVRow_Any_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, + int pix); +void SplitUVRow_Any_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, + int pix); +void SplitUVRow_Any_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, + int pix); +void SplitUVRow_Any_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, + int pix); -void MergeUV_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width); -void MergeUV_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, +void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv, int width); -void MergeUV_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width); -void MergeUV_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width); -void MergeUV_Unaligned_SSE2(const uint8* src_u, const uint8* src_v, - uint8* dst_uv, int width); -void MergeUV_Unaligned_AVX2(const uint8* src_u, const uint8* src_v, - uint8* dst_uv, int width); -void MergeUV_Unaligned_NEON(const uint8* src_u, const uint8* src_v, - uint8* dst_uv, int width); -void MergeUV_Any_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width); -void MergeUV_Any_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width); -void MergeUV_Any_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width); +void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, + int width); +void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, + int width); +void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv, + int width); +void MergeUVRow_Unaligned_SSE2(const uint8* src_u, const uint8* src_v, + uint8* dst_uv, int width); +void MergeUVRow_Unaligned_AVX2(const uint8* src_u, const uint8* src_v, + uint8* dst_uv, int width); +void MergeUVRow_Unaligned_NEON(const uint8* src_u, const uint8* src_v, + uint8* dst_uv, int width); +void MergeUVRow_Any_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, + int width); +void MergeUVRow_Any_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, + int width); +void MergeUVRow_Any_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv, + int width); void CopyRow_SSE2(const uint8* src, uint8* dst, int count); void CopyRow_X86(const uint8* src, uint8* dst, int count); @@ -561,14 +564,15 @@ void CopyRow_NEON(const uint8* src, uint8* dst, int count); void CopyRow_MIPS(const uint8* src, uint8* dst, int count); void CopyRow_C(const uint8* src, uint8* dst, int count); -void SetRow8_X86(uint8* dst, uint32 v32, int count); -void SetRows32_X86(uint8* dst, uint32 v32, int width, - int dst_stride, int height); -void SetRow8_NEON(uint8* dst, uint32 v32, int count); -void SetRows32_NEON(uint8* dst, uint32 v32, int width, - int dst_stride, int height); -void SetRow8_C(uint8* dst, uint32 v32, int count); -void SetRows32_C(uint8* dst, uint32 v32, int width, int dst_stride, int height); +void SetRow_X86(uint8* dst, uint32 v32, int count); +void ARGBSetRows_X86(uint8* dst, uint32 v32, int width, + int dst_stride, int height); +void SetRow_NEON(uint8* dst, uint32 v32, int count); +void ARGBSetRows_NEON(uint8* dst, uint32 v32, int width, + int dst_stride, int height); +void SetRow_C(uint8* dst, uint32 v32, int count); +void ARGBSetRows_C(uint8* dst, uint32 v32, int width, int dst_stride, + int height); void BGRAToARGBRow_SSSE3(const uint8* src_bgra, uint8* dst_argb, int pix); void ABGRToARGBRow_SSSE3(const uint8* src_abgr, uint8* dst_argb, int pix); @@ -615,8 +619,6 @@ void ARGB1555ToARGBRow_Any_NEON(const uint8* src_argb1555, uint8* dst_argb, void ARGB4444ToARGBRow_Any_NEON(const uint8* src_argb4444, uint8* dst_argb, int pix); - - void ARGBToRGBARow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix); @@ -1215,13 +1217,13 @@ void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size, int interval_offset, int width); // Used for blur. -void CumulativeSumToAverage_SSE2(const int32* topleft, const int32* botleft, - int width, int area, uint8* dst, int count); +void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft, + int width, int area, uint8* dst, int count); void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum, const int32* previous_cumsum, int width); -void CumulativeSumToAverage_C(const int32* topleft, const int32* botleft, - int width, int area, uint8* dst, int count); +void CumulativeSumToAverageRow_C(const int32* topleft, const int32* botleft, + int width, int area, uint8* dst, int count); void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum, const int32* previous_cumsum, int width); diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 5b3055692..be61d7a04 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 490 +#define LIBYUV_VERSION 491 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/source/convert.cc b/source/convert.cc index dcadcb6a0..ca6f147f4 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -365,56 +365,56 @@ static int X420ToI420(const uint8* src_y, } int halfwidth = (width + 1) >> 1; - void (*SplitUV)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) = - SplitUV_C; -#if defined(HAS_SPLITUV_SSE2) + void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) = + SplitUVRow_C; +#if defined(HAS_SPLITUVROW_SSE2) if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) { - SplitUV = SplitUV_Any_SSE2; + SplitUVRow = SplitUVRow_Any_SSE2; if (IS_ALIGNED(halfwidth, 16)) { - SplitUV = SplitUV_Unaligned_SSE2; + SplitUVRow = SplitUVRow_Unaligned_SSE2; if (IS_ALIGNED(src_uv, 16) && IS_ALIGNED(src_stride_uv, 16) && IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) && IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) { - SplitUV = SplitUV_SSE2; + SplitUVRow = SplitUVRow_SSE2; } } } #endif -#if defined(HAS_SPLITUV_AVX2) +#if defined(HAS_SPLITUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) { - SplitUV = SplitUV_Any_AVX2; + SplitUVRow = SplitUVRow_Any_AVX2; if (IS_ALIGNED(halfwidth, 32)) { - SplitUV = SplitUV_Unaligned_AVX2; + SplitUVRow = SplitUVRow_Unaligned_AVX2; if (IS_ALIGNED(src_uv, 32) && IS_ALIGNED(src_stride_uv, 32) && IS_ALIGNED(dst_u, 32) && IS_ALIGNED(dst_stride_u, 32) && IS_ALIGNED(dst_v, 32) && IS_ALIGNED(dst_stride_v, 32)) { - SplitUV = SplitUV_AVX2; + SplitUVRow = SplitUVRow_AVX2; } } } #endif -#if defined(HAS_SPLITUV_NEON) +#if defined(HAS_SPLITUVROW_NEON) if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) { - SplitUV = SplitUV_Any_NEON; + SplitUVRow = SplitUVRow_Any_NEON; if (IS_ALIGNED(halfwidth, 16)) { - SplitUV = SplitUV_Unaligned_NEON; + SplitUVRow = SplitUVRow_Unaligned_NEON; if (IS_ALIGNED(src_uv, 16) && IS_ALIGNED(src_stride_uv, 16) && IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) && IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) { - SplitUV = SplitUV_NEON; + SplitUVRow = SplitUVRow_NEON; } } } #endif -#if defined(HAS_SPLITUV_MIPS_DSPR2) +#if defined(HAS_SPLITUVROW_MIPS_DSPR2) if (TestCpuFlag(kCpuHasMIPS_DSPR2) && halfwidth >= 16) { - SplitUV = SplitUV_Any_MIPS_DSPR2; + SplitUVRow = SplitUVRow_Any_MIPS_DSPR2; if (IS_ALIGNED(halfwidth, 16)) { - SplitUV = SplitUV_Unaligned_MIPS_DSPR2; + SplitUVRow = SplitUVRow_Unaligned_MIPS_DSPR2; if (IS_ALIGNED(src_uv, 4) && IS_ALIGNED(src_stride_uv, 4) && IS_ALIGNED(dst_u, 4) && IS_ALIGNED(dst_stride_u, 4) && IS_ALIGNED(dst_v, 4) && IS_ALIGNED(dst_stride_v, 4)) { - SplitUV = SplitUV_MIPS_DSPR2; + SplitUVRow = SplitUVRow_MIPS_DSPR2; } } } @@ -428,7 +428,7 @@ static int X420ToI420(const uint8* src_y, int halfheight = (height + 1) >> 1; for (int y = 0; y < halfheight; ++y) { // Copy a row of UV. - SplitUV(src_uv, dst_u, dst_v, halfwidth); + SplitUVRow(src_uv, dst_u, dst_v, halfwidth); dst_u += dst_stride_u; dst_v += dst_stride_v; src_uv += src_stride_uv; diff --git a/source/convert_from.cc b/source/convert_from.cc index ff1fb315f..9b3d3ec96 100644 --- a/source/convert_from.cc +++ b/source/convert_from.cc @@ -519,43 +519,43 @@ int I420ToNV12(const uint8* src_y, int src_stride_y, } int halfwidth = (width + 1) >> 1; - void (*MergeUV)(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width) = MergeUV_C; -#if defined(HAS_MERGEUV_SSE2) + void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv, + int width) = MergeUVRow_C; +#if defined(HAS_MERGEUVROW_SSE2) if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) { - MergeUV = MergeUV_Any_SSE2; + MergeUVRow_ = MergeUVRow_Any_SSE2; if (IS_ALIGNED(halfwidth, 16)) { - MergeUV = MergeUV_Unaligned_SSE2; + MergeUVRow_ = MergeUVRow_Unaligned_SSE2; if (IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) && IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) && IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) { - MergeUV = MergeUV_SSE2; + MergeUVRow_ = MergeUVRow_SSE2; } } } #endif -#if defined(HAS_MERGEUV_AVX2) +#if defined(HAS_MERGEUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) { - MergeUV = MergeUV_Any_AVX2; + MergeUVRow_ = MergeUVRow_Any_AVX2; if (IS_ALIGNED(halfwidth, 32)) { - MergeUV = MergeUV_Unaligned_AVX2; + MergeUVRow_ = MergeUVRow_Unaligned_AVX2; if (IS_ALIGNED(src_u, 32) && IS_ALIGNED(src_stride_u, 32) && IS_ALIGNED(src_v, 32) && IS_ALIGNED(src_stride_v, 32) && IS_ALIGNED(dst_uv, 32) && IS_ALIGNED(dst_stride_uv, 32)) { - MergeUV = MergeUV_AVX2; + MergeUVRow_ = MergeUVRow_AVX2; } } } #endif -#if defined(HAS_MERGEUV_NEON) +#if defined(HAS_MERGEUVROW_NEON) if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) { - MergeUV = MergeUV_Any_NEON; + MergeUVRow_ = MergeUVRow_Any_NEON; if (IS_ALIGNED(halfwidth, 16)) { - MergeUV = MergeUV_Unaligned_NEON; + MergeUVRow_ = MergeUVRow_Unaligned_NEON; if (IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) && IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) && IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) { - MergeUV = MergeUV_NEON; + MergeUVRow_ = MergeUVRow_NEON; } } } @@ -565,7 +565,7 @@ int I420ToNV12(const uint8* src_y, int src_stride_y, int halfheight = (height + 1) >> 1; for (int y = 0; y < halfheight; ++y) { // Merge a row of U and V into a row of UV. - MergeUV(src_u, src_v, dst_uv, halfwidth); + MergeUVRow_(src_u, src_v, dst_uv, halfwidth); src_u += src_stride_u; src_v += src_stride_v; dst_uv += dst_stride_uv; diff --git a/source/convert_from_argb.cc b/source/convert_from_argb.cc index da8c83ede..0ba659d7f 100644 --- a/source/convert_from_argb.cc +++ b/source/convert_from_argb.cc @@ -248,37 +248,37 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb, } #endif int halfwidth = (width + 1) >> 1; - void (*MergeUV)(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width) = MergeUV_C; -#if defined(HAS_MERGEUV_SSE2) + void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv, + int width) = MergeUVRow_C; +#if defined(HAS_MERGEUVROW_SSE2) if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) { - MergeUV = MergeUV_Any_SSE2; + MergeUVRow_ = MergeUVRow_Any_SSE2; if (IS_ALIGNED(halfwidth, 16)) { - MergeUV = MergeUV_Unaligned_SSE2; + MergeUVRow_ = MergeUVRow_Unaligned_SSE2; if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) { - MergeUV = MergeUV_SSE2; + MergeUVRow_ = MergeUVRow_SSE2; } } } #endif -#if defined(HAS_MERGEUV_AVX2) +#if defined(HAS_MERGEUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) { - MergeUV = MergeUV_Any_AVX2; + MergeUVRow_ = MergeUVRow_Any_AVX2; if (IS_ALIGNED(halfwidth, 32)) { - MergeUV = MergeUV_Unaligned_AVX2; + MergeUVRow_ = MergeUVRow_Unaligned_AVX2; if (IS_ALIGNED(dst_uv, 32) && IS_ALIGNED(dst_stride_uv, 32)) { - MergeUV = MergeUV_AVX2; + MergeUVRow_ = MergeUVRow_AVX2; } } } #endif -#if defined(HAS_MERGEUV_NEON) +#if defined(HAS_MERGEUVROW_NEON) if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) { - MergeUV = MergeUV_Any_NEON; + MergeUVRow_ = MergeUVRow_Any_NEON; if (IS_ALIGNED(halfwidth, 16)) { - MergeUV = MergeUV_Unaligned_NEON; + MergeUVRow_ = MergeUVRow_Unaligned_NEON; if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) { - MergeUV = MergeUV_NEON; + MergeUVRow_ = MergeUVRow_NEON; } } } @@ -289,7 +289,7 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb, for (int y = 0; y < height - 1; y += 2) { ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width); - MergeUV(row_u, row_v, dst_uv, halfwidth); + MergeUVRow_(row_u, row_v, dst_uv, halfwidth); ARGBToYRow(src_argb, dst_y, width); ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width); src_argb += src_stride_argb * 2; @@ -298,7 +298,7 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb, } if (height & 1) { ARGBToUVRow(src_argb, 0, row_u, row_v, width); - MergeUV(row_u, row_v, dst_uv, halfwidth); + MergeUVRow_(row_u, row_v, dst_uv, halfwidth); ARGBToYRow(src_argb, dst_y, width); ARGBToYRow(src_argb + 0, dst_y + dst_stride_y, width); } @@ -357,37 +357,37 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb, } #endif int halfwidth = (width + 1) >> 1; - void (*MergeUV)(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width) = MergeUV_C; -#if defined(HAS_MERGEUV_SSE2) + void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv, + int width) = MergeUVRow_C; +#if defined(HAS_MERGEUVROW_SSE2) if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) { - MergeUV = MergeUV_Any_SSE2; + MergeUVRow_ = MergeUVRow_Any_SSE2; if (IS_ALIGNED(halfwidth, 16)) { - MergeUV = MergeUV_Unaligned_SSE2; + MergeUVRow_ = MergeUVRow_Unaligned_SSE2; if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) { - MergeUV = MergeUV_SSE2; + MergeUVRow_ = MergeUVRow_SSE2; } } } #endif -#if defined(HAS_MERGEUV_AVX2) +#if defined(HAS_MERGEUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) { - MergeUV = MergeUV_Any_AVX2; + MergeUVRow_ = MergeUVRow_Any_AVX2; if (IS_ALIGNED(halfwidth, 32)) { - MergeUV = MergeUV_Unaligned_AVX2; + MergeUVRow_ = MergeUVRow_Unaligned_AVX2; if (IS_ALIGNED(dst_uv, 32) && IS_ALIGNED(dst_stride_uv, 32)) { - MergeUV = MergeUV_AVX2; + MergeUVRow_ = MergeUVRow_AVX2; } } } #endif -#if defined(HAS_MERGEUV_NEON) +#if defined(HAS_MERGEUVROW_NEON) if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) { - MergeUV = MergeUV_Any_NEON; + MergeUVRow_ = MergeUVRow_Any_NEON; if (IS_ALIGNED(halfwidth, 16)) { - MergeUV = MergeUV_Unaligned_NEON; + MergeUVRow_ = MergeUVRow_Unaligned_NEON; if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) { - MergeUV = MergeUV_NEON; + MergeUVRow_ = MergeUVRow_NEON; } } } @@ -398,7 +398,7 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb, for (int y = 0; y < height - 1; y += 2) { ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width); - MergeUV(row_v, row_u, dst_uv, halfwidth); + MergeUVRow_(row_v, row_u, dst_uv, halfwidth); ARGBToYRow(src_argb, dst_y, width); ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width); src_argb += src_stride_argb * 2; @@ -407,7 +407,7 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb, } if (height & 1) { ARGBToUVRow(src_argb, 0, row_u, row_v, width); - MergeUV(row_v, row_u, dst_uv, halfwidth); + MergeUVRow_(row_v, row_u, dst_uv, halfwidth); ARGBToYRow(src_argb, dst_y, width); ARGBToYRow(src_argb + 0, dst_y + dst_stride_y, width); } diff --git a/source/planar_functions.cc b/source/planar_functions.cc index 96be20234..3177ef710 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -656,17 +656,17 @@ LIBYUV_API void SetPlane(uint8* dst_y, int dst_stride_y, int width, int height, uint32 value) { - void (*SetRow)(uint8* dst, uint32 value, int pix) = SetRow8_C; + void (*SetRow)(uint8* dst, uint32 value, int pix) = SetRow_C; #if defined(HAS_SETROW_NEON) if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16) && IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { - SetRow = SetRow8_NEON; + SetRow = SetRow_NEON; } #endif #if defined(HAS_SETROW_X86) if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) { - SetRow = SetRow8_X86; + SetRow = SetRow_X86; } #endif @@ -721,17 +721,17 @@ int ARGBRect(uint8* dst_argb, int dst_stride_argb, #if defined(HAS_SETROW_NEON) if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16) && IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - SetRows32_NEON(dst, value, width, dst_stride_argb, height); + ARGBSetRows_NEON(dst, value, width, dst_stride_argb, height); return 0; } #endif #if defined(HAS_SETROW_X86) if (TestCpuFlag(kCpuHasX86)) { - SetRows32_X86(dst, value, width, dst_stride_argb, height); + ARGBSetRows_X86(dst, value, width, dst_stride_argb, height); return 0; } #endif - SetRows32_C(dst, value, width, dst_stride_argb, height); + ARGBSetRows_C(dst, value, width, dst_stride_argb, height); return 0; } @@ -985,7 +985,7 @@ int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb, } void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum, const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C; -#if defined(HAS_CUMULATIVESUMTOAVERAGE_SSE2) +#if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2; } @@ -1015,12 +1015,12 @@ int ARGBBlur(const uint8* src_argb, int src_stride_argb, } void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum, const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C; - void (*CumulativeSumToAverage)(const int32* topleft, const int32* botleft, - int width, int area, uint8* dst, int count) = CumulativeSumToAverage_C; -#if defined(HAS_CUMULATIVESUMTOAVERAGE_SSE2) + void (*CUMULATIVESUMTOAVERAGEROW)(const int32* topleft, const int32* botleft, + int width, int area, uint8* dst, int count) = CumulativeSumToAverageRow_C; +#if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2; - CumulativeSumToAverage = CumulativeSumToAverage_SSE2; + CUMULATIVESUMTOAVERAGEROW = CumulativeSumToAverageRow_SSE2; } #endif // Compute enough CumulativeSum for first row to be blurred. After this @@ -1065,7 +1065,7 @@ int ARGBBlur(const uint8* src_argb, int src_stride_argb, int boxwidth = radius * 4; int x; for (x = 0; x < radius + 1; ++x) { - CumulativeSumToAverage(cumsum_top_row, cumsum_bot_row, + CUMULATIVESUMTOAVERAGEROW(cumsum_top_row, cumsum_bot_row, boxwidth, area, &dst_argb[x * 4], 1); area += (bot_y - top_y); boxwidth += 4; @@ -1073,14 +1073,14 @@ int ARGBBlur(const uint8* src_argb, int src_stride_argb, // Middle unclipped. int n = (width - 1) - radius - x + 1; - CumulativeSumToAverage(cumsum_top_row, cumsum_bot_row, + CUMULATIVESUMTOAVERAGEROW(cumsum_top_row, cumsum_bot_row, boxwidth, area, &dst_argb[x * 4], n); // Right clipped. for (x += n; x <= width - 1; ++x) { area -= (bot_y - top_y); boxwidth -= 4; - CumulativeSumToAverage(cumsum_top_row + (x - radius - 1) * 4, + CUMULATIVESUMTOAVERAGEROW(cumsum_top_row + (x - radius - 1) * 4, cumsum_bot_row + (x - radius - 1) * 4, boxwidth, area, &dst_argb[x * 4], 1); } @@ -1104,7 +1104,7 @@ int ARGBShade(const uint8* src_argb, int src_stride_argb, } void (*ARGBShadeRow)(const uint8* src_argb, uint8* dst_argb, int width, uint32 value) = ARGBShadeRow_C; -#if defined(HAS_ARGBSHADE_SSE2) +#if defined(HAS_ARGBSHADEROW_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) && IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { diff --git a/source/rotate.cc b/source/rotate.cc index fee14b3b5..86d715d83 100644 --- a/source/rotate.cc +++ b/source/rotate.cc @@ -1057,7 +1057,7 @@ void RotateUV180(const uint8* src, int src_stride, IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) { MirrorRowUV = MirrorUVRow_SSSE3; } -#elif defined(HAS_MirrorUVRow_MIPS_DSPR2) +#elif defined(HAS_MIRRORUVROW_MIPS_DSPR2) if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) { MirrorRowUV = MirrorUVRow_MIPS_DSPR2; diff --git a/source/row_any.cc b/source/row_any.cc index 472683c08..07ed399ed 100644 --- a/source/row_any.cc +++ b/source/row_any.cc @@ -294,7 +294,7 @@ UV422ANY(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, #endif #undef UV422ANY -#define SPLITUVANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, MASK) \ +#define SplitUVRowANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, MASK) \ void NAMEANY(const uint8* src_uv, \ uint8* dst_u, uint8* dst_v, int width) { \ int n = width & ~MASK; \ @@ -305,21 +305,21 @@ UV422ANY(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, width & MASK); \ } -#ifdef HAS_SPLITUV_SSE2 -SPLITUVANY(SplitUV_Any_SSE2, SplitUV_Unaligned_SSE2, SplitUV_C, 15) +#ifdef HAS_SPLITUVROW_SSE2 +SplitUVRowANY(SplitUVRow_Any_SSE2, SplitUVRow_Unaligned_SSE2, SplitUVRow_C, 15) #endif -#ifdef HAS_SPLITUV_AVX2 -SPLITUVANY(SplitUV_Any_AVX2, SplitUV_Unaligned_AVX2, SplitUV_C, 31) +#ifdef HAS_SPLITUVROW_AVX2 +SplitUVRowANY(SplitUVRow_Any_AVX2, SplitUVRow_Unaligned_AVX2, SplitUVRow_C, 31) #endif -#ifdef HAS_SPLITUV_NEON -SPLITUVANY(SplitUV_Any_NEON, SplitUV_Unaligned_NEON, SplitUV_C, 15) +#ifdef HAS_SPLITUVROW_NEON +SplitUVRowANY(SplitUVRow_Any_NEON, SplitUVRow_Unaligned_NEON, SplitUVRow_C, 15) #endif -#ifdef HAS_SPLITUV_MIPS_DSPR2 -SPLITUVANY(SplitUV_Any_MIPS_DSPR2, SplitUV_Unaligned_MIPS_DSPR2, SplitUV_C, 15) +#ifdef HAS_SPLITUVROW_MIPS_DSPR2 +SplitUVRowANY(SplitUVRow_Any_MIPS_DSPR2, SplitUVRow_Unaligned_MIPS_DSPR2, SplitUVRow_C, 15) #endif -#undef SPLITUVANY +#undef SplitUVRowANY -#define MERGEUVANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, MASK) \ +#define MergeUVRow_ANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, MASK) \ void NAMEANY(const uint8* src_u, const uint8* src_v, \ uint8* dst_uv, int width) { \ int n = width & ~MASK; \ @@ -330,16 +330,16 @@ SPLITUVANY(SplitUV_Any_MIPS_DSPR2, SplitUV_Unaligned_MIPS_DSPR2, SplitUV_C, 15) width & MASK); \ } -#ifdef HAS_MERGEUV_SSE2 -MERGEUVANY(MergeUV_Any_SSE2, MergeUV_Unaligned_SSE2, MergeUV_C, 15) +#ifdef HAS_MERGEUVROW_SSE2 +MergeUVRow_ANY(MergeUVRow_Any_SSE2, MergeUVRow_Unaligned_SSE2, MergeUVRow_C, 15) #endif -#ifdef HAS_MERGEUV_AVX2 -MERGEUVANY(MergeUV_Any_AVX2, MergeUV_Unaligned_AVX2, MergeUV_C, 31) +#ifdef HAS_MERGEUVROW_AVX2 +MergeUVRow_ANY(MergeUVRow_Any_AVX2, MergeUVRow_Unaligned_AVX2, MergeUVRow_C, 31) #endif -#ifdef HAS_MERGEUV_NEON -MERGEUVANY(MergeUV_Any_NEON, MergeUV_Unaligned_NEON, MergeUV_C, 15) +#ifdef HAS_MERGEUVROW_NEON +MergeUVRow_ANY(MergeUVRow_Any_NEON, MergeUVRow_Unaligned_NEON, MergeUVRow_C, 15) #endif -#undef MERGEUVANY +#undef MergeUVRow_ANY #ifdef __cplusplus } // extern "C" diff --git a/source/row_common.cc b/source/row_common.cc index ea2c05456..c20eb16af 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -1187,7 +1187,7 @@ void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width) { } } -void SplitUV_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { +void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { for (int x = 0; x < width - 1; x += 2) { dst_u[x] = src_uv[0]; dst_u[x + 1] = src_uv[2]; @@ -1201,8 +1201,8 @@ void SplitUV_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { } } -void MergeUV_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width) { +void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv, + int width) { for (int x = 0; x < width - 1; x += 2) { dst_uv[0] = src_u[x]; dst_uv[1] = src_v[x]; @@ -1220,7 +1220,7 @@ void CopyRow_C(const uint8* src, uint8* dst, int count) { memcpy(dst, src, count); } -void SetRow8_C(uint8* dst, uint32 v8, int count) { +void SetRow_C(uint8* dst, uint32 v8, int count) { #ifdef _MSC_VER // VC will generate rep stosb. for (int x = 0; x < count; ++x) { @@ -1231,7 +1231,7 @@ void SetRow8_C(uint8* dst, uint32 v8, int count) { #endif } -void SetRows32_C(uint8* dst, uint32 v32, int width, +void ARGBSetRows_C(uint8* dst, uint32 v32, int width, int dst_stride, int height) { for (int y = 0; y < height; ++y) { uint32* d = reinterpret_cast(dst); @@ -1498,8 +1498,8 @@ void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum, } } -void CumulativeSumToAverage_C(const int32* tl, const int32* bl, - int w, int area, uint8* dst, int count) { +void CumulativeSumToAverageRow_C(const int32* tl, const int32* bl, + int w, int area, uint8* dst, int count) { float ooa = 1.0f / area; for (int i = 0; i < count; ++i) { dst[0] = static_cast((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa); diff --git a/source/row_mips.cc b/source/row_mips.cc index 48759e09d..6029f5542 100644 --- a/source/row_mips.cc +++ b/source/row_mips.cc @@ -23,9 +23,9 @@ void CopyRow_MIPS(const uint8* src, uint8* dst, int count) { } #endif -#ifdef HAS_SPLITUV_MIPS_DSPR2 -void SplitUV_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int width) { +#ifdef HAS_SPLITUVROW_MIPS_DSPR2 +void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, + int width) { __asm__ __volatile__ ( ".set push \n" ".set noreorder \n" @@ -90,8 +90,8 @@ void SplitUV_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, ); } -void SplitUV_Unaligned_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, - uint8* dst_v, int width) { +void SplitUVRow_Unaligned_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, + uint8* dst_v, int width) { __asm__ __volatile__ ( ".set push \n" ".set noreorder \n" @@ -171,7 +171,7 @@ void SplitUV_Unaligned_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, "t4", "t5", "t6", "t7", "t8", "t9" ); } -#endif // HAS_SPLITUV_MIPS_DSPR2 +#endif // HAS_SPLITUVROW_MIPS_DSPR2 #ifdef HAS_MIRRORROW_MIPS_DSPR2 void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width) { @@ -225,7 +225,7 @@ void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width) { } #endif // HAS_MIRRORROW_MIPS_DSPR2 -#ifdef HAS_MirrorUVRow_MIPS_DSPR2 +#ifdef HAS_MIRRORUVROW_MIPS_DSPR2 void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { int x = 0; @@ -315,9 +315,7 @@ void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, "t5", "t7", "t8", "t9" ); } -#endif // HAS_MirrorUVRow_MIPS_DSPR2 - - +#endif // HAS_MIRRORUVROW_MIPS_DSPR2 // Convert (4 Y and 2 VU) I422 and arrange RGB values into // t5 = | 0 | B0 | 0 | b0 | diff --git a/source/row_neon.cc b/source/row_neon.cc index 566352c90..fdbd13ba2 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -747,10 +747,11 @@ void UYVYToARGBRow_NEON(const uint8* src_uyvy, } #endif // HAS_UYVYTOARGBROW_NEON -#ifdef HAS_SPLITUV_NEON +#ifdef HAS_SPLITUVROW_NEON // Reads 16 pairs of UV and write even values to dst_u and odd to dst_v // Alignment requirement: 16 bytes for pointers, and multiple of 16 pixels. -void SplitUV_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { +void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, + int width) { asm volatile ( ".p2align 2 \n" "1: \n" @@ -770,8 +771,8 @@ void SplitUV_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { // Reads 16 pairs of UV and write even values to dst_u and odd to dst_v // Alignment requirement: Multiple of 16 pixels, pointers unaligned. -void SplitUV_Unaligned_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int width) { +void SplitUVRow_Unaligned_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, + int width) { asm volatile ( ".p2align 2 \n" "1: \n" @@ -788,13 +789,13 @@ void SplitUV_Unaligned_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, : "memory", "cc", "q0", "q1" // Clobber List ); } -#endif // HAS_SPLITUV_NEON +#endif // HAS_SPLITUVROW_NEON -#ifdef HAS_MERGEUV_NEON +#ifdef HAS_MERGEUVROW_NEON // Reads 16 U's and V's and writes out 16 pairs of UV. // Alignment requirement: 16 bytes for pointers, and multiple of 16 pixels. -void MergeUV_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width) { +void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv, + int width) { asm volatile ( ".p2align 2 \n" "1: \n" @@ -814,7 +815,7 @@ void MergeUV_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv, } // Reads 16 U's and V's and writes out 16 pairs of UV. -void MergeUV_Unaligned_NEON(const uint8* src_u, const uint8* src_v, +void MergeUVRow_Unaligned_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv, int width) { asm volatile ( ".p2align 2 \n" @@ -833,7 +834,7 @@ void MergeUV_Unaligned_NEON(const uint8* src_u, const uint8* src_v, : "memory", "cc", "q0", "q1" // Clobber List ); } -#endif // HAS_MERGEUV_NEON +#endif // HAS_MERGEUVROW_NEON #ifdef HAS_COPYROW_NEON // Copy multiple of 32. vld4.u8 allow unaligned and is fastest on a15. void CopyRow_NEON(const uint8* src, uint8* dst, int count) { @@ -855,7 +856,7 @@ void CopyRow_NEON(const uint8* src, uint8* dst, int count) { #ifdef HAS_SETROW_NEON // SetRow8 writes 'count' bytes using a 32 bit value repeated. -void SetRow8_NEON(uint8* dst, uint32 v32, int count) { +void SetRow_NEON(uint8* dst, uint32 v32, int count) { asm volatile ( "vdup.u32 q0, %2 \n" // duplicate 4 ints "1: \n" @@ -871,10 +872,10 @@ void SetRow8_NEON(uint8* dst, uint32 v32, int count) { // TODO(fbarchard): Make fully assembler // SetRow32 writes 'count' words using a 32 bit value repeated. -void SetRows32_NEON(uint8* dst, uint32 v32, int width, +void ARGBSetRows_NEON(uint8* dst, uint32 v32, int width, int dst_stride, int height) { for (int y = 0; y < height; ++y) { - SetRow8_NEON(dst, v32, width << 2); + SetRow_NEON(dst, v32, width << 2); dst += dst_stride; } } diff --git a/source/row_posix.cc b/source/row_posix.cc index f7c9b1238..5d3f9a5e5 100644 --- a/source/row_posix.cc +++ b/source/row_posix.cc @@ -2621,8 +2621,8 @@ void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width) { } #endif // HAS_ARGBMIRRORROW_SSSE3 -#ifdef HAS_SPLITUV_SSE2 -void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) { +#ifdef HAS_SPLITUVROW_SSE2 +void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) { asm volatile ( "pcmpeqb %%xmm5,%%xmm5 \n" "psrlw $0x8,%%xmm5 \n" @@ -2657,8 +2657,8 @@ void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) { ); } -void SplitUV_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int pix) { +void SplitUVRow_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, + int pix) { asm volatile ( "pcmpeqb %%xmm5,%%xmm5 \n" "psrlw $0x8,%%xmm5 \n" @@ -2692,11 +2692,11 @@ void SplitUV_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, #endif ); } -#endif // HAS_SPLITUV_SSE2 +#endif // HAS_SPLITUVROW_SSE2 -#ifdef HAS_MERGEUV_SSE2 -void MergeUV_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width) { +#ifdef HAS_MERGEUVROW_SSE2 +void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, + int width) { asm volatile ( "sub %0,%1 \n" ".p2align 4 \n" @@ -2724,8 +2724,8 @@ void MergeUV_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, ); } -void MergeUV_Unaligned_SSE2(const uint8* src_u, const uint8* src_v, - uint8* dst_uv, int width) { +void MergeUVRow_Unaligned_SSE2(const uint8* src_u, const uint8* src_v, + uint8* dst_uv, int width) { asm volatile ( "sub %0,%1 \n" ".p2align 4 \n" @@ -2752,7 +2752,7 @@ void MergeUV_Unaligned_SSE2(const uint8* src_u, const uint8* src_v, #endif ); } -#endif // HAS_MERGEUV_SSE2 +#endif // HAS_MERGEUVROW_SSE2 #ifdef HAS_COPYROW_SSE2 void CopyRow_SSE2(const uint8* src, uint8* dst, int count) { @@ -2795,7 +2795,7 @@ void CopyRow_X86(const uint8* src, uint8* dst, int width) { #endif // HAS_COPYROW_X86 #ifdef HAS_SETROW_X86 -void SetRow8_X86(uint8* dst, uint32 v32, int width) { +void SetRow_X86(uint8* dst, uint32 v32, int width) { size_t width_tmp = static_cast(width); asm volatile ( "shr $0x2,%1 \n" @@ -2806,7 +2806,7 @@ void SetRow8_X86(uint8* dst, uint32 v32, int width) { : "memory", "cc"); } -void SetRows32_X86(uint8* dst, uint32 v32, int width, +void ARGBSetRows_X86(uint8* dst, uint32 v32, int width, int dst_stride, int height) { for (int y = 0; y < height; ++y) { size_t width_tmp = static_cast(width); @@ -4002,9 +4002,10 @@ void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum, } #endif // HAS_COMPUTECUMULATIVESUMROW_SSE2 -#ifdef HAS_CUMULATIVESUMTOAVERAGE_SSE2 -void CumulativeSumToAverage_SSE2(const int32* topleft, const int32* botleft, - int width, int area, uint8* dst, int count) { +#ifdef HAS_CUMULATIVESUMTOAVERAGEROW_SSE2 +void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft, + int width, int area, uint8* dst, + int count) { asm volatile ( "movd %5,%%xmm4 \n" "cvtdq2ps %%xmm4,%%xmm4 \n" @@ -4089,8 +4090,8 @@ void CumulativeSumToAverage_SSE2(const int32* topleft, const int32* botleft, #endif ); } -#endif // HAS_CUMULATIVESUMTOAVERAGE_SSE2 -#ifdef HAS_ARGBSHADE_SSE2 +#endif // HAS_CUMULATIVESUMTOAVERAGEROW_SSE2 +#ifdef HAS_ARGBSHADEROW_SSE2 // Shade 4 pixels at a time by specified value. // Aligned to 16 bytes. void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width, @@ -4127,7 +4128,7 @@ void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width, #endif ); } -#endif // HAS_ARGBSHADE_SSE2 +#endif // HAS_ARGBSHADEROW_SSE2 #ifdef HAS_ARGBAFFINEROW_SSE2 // TODO(fbarchard): Find 64 bit way to avoid masking. diff --git a/source/row_win.cc b/source/row_win.cc index 87da31699..e96d9739a 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -2767,9 +2767,9 @@ __asm { } #endif // HAS_ARGBMIRRORROW_SSSE3 -#ifdef HAS_SPLITUV_SSE2 +#ifdef HAS_SPLITUVROW_SSE2 __declspec(naked) __declspec(align(16)) -void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) { +void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) { __asm { push edi mov eax, [esp + 4 + 4] // src_uv @@ -2805,8 +2805,8 @@ void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) { } __declspec(naked) __declspec(align(16)) -void SplitUV_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int pix) { +void SplitUVRow_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, + int pix) { __asm { push edi mov eax, [esp + 4 + 4] // src_uv @@ -2840,12 +2840,12 @@ void SplitUV_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, ret } } -#endif // HAS_SPLITUV_SSE2 +#endif // HAS_SPLITUVROW_SSE2 -#ifdef HAS_MERGEUV_SSE2 +#ifdef HAS_MERGEUVROW_SSE2 __declspec(naked) __declspec(align(16)) -void MergeUV_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width) { +void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, + int width) { __asm { push edi mov eax, [esp + 4 + 4] // src_u @@ -2874,8 +2874,8 @@ void MergeUV_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, } __declspec(naked) __declspec(align(16)) -void MergeUV_Unaligned_SSE2(const uint8* src_u, const uint8* src_v, - uint8* dst_uv, int width) { +void MergeUVRow_Unaligned_SSE2(const uint8* src_u, const uint8* src_v, + uint8* dst_uv, int width) { __asm { push edi mov eax, [esp + 4 + 4] // src_u @@ -2902,7 +2902,7 @@ void MergeUV_Unaligned_SSE2(const uint8* src_u, const uint8* src_v, ret } } -#endif // HAS_MERGEUV_SSE2 +#endif // HAS_MERGEUVROW_SSE2 #ifdef HAS_COPYROW_SSE2 // CopyRow copys 'count' bytes using a 16 byte load/store, 32 bytes at time. @@ -2949,7 +2949,7 @@ void CopyRow_X86(const uint8* src, uint8* dst, int count) { #ifdef HAS_SETROW_X86 // SetRow8 writes 'count' bytes using a 32 bit value repeated. __declspec(naked) __declspec(align(16)) -void SetRow8_X86(uint8* dst, uint32 v32, int count) { +void SetRow_X86(uint8* dst, uint32 v32, int count) { __asm { mov edx, edi mov edi, [esp + 4] // dst @@ -2964,7 +2964,7 @@ void SetRow8_X86(uint8* dst, uint32 v32, int count) { // SetRow32 writes 'count' words using a 32 bit value repeated. __declspec(naked) __declspec(align(16)) -void SetRows32_X86(uint8* dst, uint32 v32, int width, +void ARGBSetRows_X86(uint8* dst, uint32 v32, int width, int dst_stride, int height) { __asm { push esi @@ -4125,7 +4125,7 @@ void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size, } #endif // HAS_ARGBQUANTIZEROW_SSE2 -#ifdef HAS_CUMULATIVESUMTOAVERAGE_SSE2 +#ifdef HAS_CUMULATIVESUMTOAVERAGEROW_SSE2 // Consider float CumulativeSum. // Consider calling CumulativeSum one row at time as needed. // Consider circular CumulativeSum buffer of radius * 2 + 1 height. @@ -4139,8 +4139,9 @@ void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size, // count is number of averaged pixels to produce. // Does 4 pixels at a time, requires CumulativeSum pointers to be 16 byte // aligned. -void CumulativeSumToAverage_SSE2(const int32* topleft, const int32* botleft, - int width, int area, uint8* dst, int count) { +void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft, + int width, int area, uint8* dst, + int count) { __asm { mov eax, topleft // eax topleft mov esi, botleft // esi botleft @@ -4228,7 +4229,7 @@ void CumulativeSumToAverage_SSE2(const int32* topleft, const int32* botleft, l1b: } } -#endif // HAS_CUMULATIVESUMTOAVERAGE_SSE2 +#endif // HAS_CUMULATIVESUMTOAVERAGEROW_SSE2 #ifdef HAS_COMPUTECUMULATIVESUMROW_SSE2 // Creates a table of cumulative sums where each value is a sum of all values @@ -4315,7 +4316,7 @@ void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum, } #endif // HAS_COMPUTECUMULATIVESUMROW_SSE2 -#ifdef HAS_ARGBSHADE_SSE2 +#ifdef HAS_ARGBSHADEROW_SSE2 // Shade 4 pixels at a time by specified value. // Aligned to 16 bytes. __declspec(naked) __declspec(align(16)) @@ -4349,7 +4350,7 @@ void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width, ret } } -#endif // HAS_ARGBSHADE_SSE2 +#endif // HAS_ARGBSHADEROW_SSE2 #ifdef HAS_ARGBAFFINEROW_SSE2 // Copy ARGB pixels from source image with slope to a row of destination. diff --git a/source/row_x86.asm b/source/row_x86.asm index 51a45880d..dfc3ae32d 100644 --- a/source/row_x86.asm +++ b/source/row_x86.asm @@ -65,10 +65,10 @@ YUY2TOYROW YUY2,u,_Unaligned YUY2TOYROW UYVY,a, YUY2TOYROW UYVY,u,_Unaligned -; void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) +; void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) -%macro SPLITUV 1-2 -cglobal SplitUV%2, 4, 4, 5, src_uv, dst_u, dst_v, pix +%macro SplitUVRow 1-2 +cglobal SplitUVRow%2, 4, 4, 5, src_uv, dst_u, dst_v, pix pcmpeqb m4, m4, m4 ; generate mask 0x00ff00ff psrlw m4, m4, 8 sub dst_vq, dst_uq @@ -95,20 +95,20 @@ cglobal SplitUV%2, 4, 4, 5, src_uv, dst_u, dst_v, pix %endmacro INIT_MMX MMX -SPLITUV a, -SPLITUV u,_Unaligned +SplitUVRow a, +SplitUVRow u,_Unaligned INIT_XMM SSE2 -SPLITUV a, -SPLITUV u,_Unaligned +SplitUVRow a, +SplitUVRow u,_Unaligned INIT_YMM AVX2 -SPLITUV a, -SPLITUV u,_Unaligned +SplitUVRow a, +SplitUVRow u,_Unaligned -; void MergeUV_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, -; int width); +; void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, +; int width); -%macro MergeUV 1-2 -cglobal MergeUV%2, 4, 4, 3, src_u, src_v, dst_uv, pix +%macro MergeUVRow_ 1-2 +cglobal MergeUVRow_%2, 4, 4, 3, src_u, src_v, dst_uv, pix sub src_vq, src_uq ALIGN 16 @@ -128,12 +128,12 @@ cglobal MergeUV%2, 4, 4, 3, src_u, src_v, dst_uv, pix %endmacro INIT_MMX MMX -MERGEUV a, -MERGEUV u,_Unaligned +MergeUVRow_ a, +MergeUVRow_ u,_Unaligned INIT_XMM SSE2 -MERGEUV a, -MERGEUV u,_Unaligned +MergeUVRow_ a, +MergeUVRow_ u,_Unaligned INIT_YMM AVX2 -MERGEUV a, -MERGEUV u,_Unaligned +MergeUVRow_ a, +MergeUVRow_ u,_Unaligned