mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
Lint cleanup after C99 change CL
TBR=braveyao@chromium.org Bug: libyuv:774 Test: git cl lint Change-Id: I51cf8107a8db17fbc9952d610f3e4d7aac5aa743 Reviewed-on: https://chromium-review.googlesource.com/882217 Reviewed-by: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
parent
f1c5345046
commit
92e22cf5b6
@ -31,11 +31,16 @@ uint64_t ComputeHammingDistance(const uint8_t* src_a,
|
|||||||
// Scan an opaque argb image and return fourcc based on alpha offset.
|
// Scan an opaque argb image and return fourcc based on alpha offset.
|
||||||
// Returns FOURCC_ARGB, FOURCC_BGRA, or 0 if unknown.
|
// Returns FOURCC_ARGB, FOURCC_BGRA, or 0 if unknown.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
uint32_t ARGBDetect(const uint8_t* argb, int stride_argb, int width, int height);
|
uint32_t ARGBDetect(const uint8_t* argb,
|
||||||
|
int stride_argb,
|
||||||
|
int width,
|
||||||
|
int height);
|
||||||
|
|
||||||
// Sum Square Error - used to compute Mean Square Error or PSNR.
|
// Sum Square Error - used to compute Mean Square Error or PSNR.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
uint64_t ComputeSumSquareError(const uint8_t* src_a, const uint8_t* src_b, int count);
|
uint64_t ComputeSumSquareError(const uint8_t* src_a,
|
||||||
|
const uint8_t* src_b,
|
||||||
|
int count);
|
||||||
|
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
uint64_t ComputeSumSquareErrorPlane(const uint8_t* src_a,
|
uint64_t ComputeSumSquareErrorPlane(const uint8_t* src_a,
|
||||||
|
|||||||
@ -90,18 +90,40 @@ extern "C" {
|
|||||||
#define HAS_SUMSQUAREERROR_MSA
|
#define HAS_SUMSQUAREERROR_MSA
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
uint32_t HammingDistance_C(const uint8_t* src_a, const uint8_t* src_b, int count);
|
uint32_t HammingDistance_C(const uint8_t* src_a,
|
||||||
uint32_t HammingDistance_SSE42(const uint8_t* src_a, const uint8_t* src_b, int count);
|
const uint8_t* src_b,
|
||||||
uint32_t HammingDistance_SSSE3(const uint8_t* src_a, const uint8_t* src_b, int count);
|
int count);
|
||||||
uint32_t HammingDistance_AVX2(const uint8_t* src_a, const uint8_t* src_b, int count);
|
uint32_t HammingDistance_SSE42(const uint8_t* src_a,
|
||||||
uint32_t HammingDistance_NEON(const uint8_t* src_a, const uint8_t* src_b, int count);
|
const uint8_t* src_b,
|
||||||
uint32_t HammingDistance_MSA(const uint8_t* src_a, const uint8_t* src_b, int count);
|
int count);
|
||||||
|
uint32_t HammingDistance_SSSE3(const uint8_t* src_a,
|
||||||
|
const uint8_t* src_b,
|
||||||
|
int count);
|
||||||
|
uint32_t HammingDistance_AVX2(const uint8_t* src_a,
|
||||||
|
const uint8_t* src_b,
|
||||||
|
int count);
|
||||||
|
uint32_t HammingDistance_NEON(const uint8_t* src_a,
|
||||||
|
const uint8_t* src_b,
|
||||||
|
int count);
|
||||||
|
uint32_t HammingDistance_MSA(const uint8_t* src_a,
|
||||||
|
const uint8_t* src_b,
|
||||||
|
int count);
|
||||||
|
|
||||||
uint32_t SumSquareError_C(const uint8_t* src_a, const uint8_t* src_b, int count);
|
uint32_t SumSquareError_C(const uint8_t* src_a,
|
||||||
uint32_t SumSquareError_SSE2(const uint8_t* src_a, const uint8_t* src_b, int count);
|
const uint8_t* src_b,
|
||||||
uint32_t SumSquareError_AVX2(const uint8_t* src_a, const uint8_t* src_b, int count);
|
int count);
|
||||||
uint32_t SumSquareError_NEON(const uint8_t* src_a, const uint8_t* src_b, int count);
|
uint32_t SumSquareError_SSE2(const uint8_t* src_a,
|
||||||
uint32_t SumSquareError_MSA(const uint8_t* src_a, const uint8_t* src_b, int count);
|
const uint8_t* src_b,
|
||||||
|
int count);
|
||||||
|
uint32_t SumSquareError_AVX2(const uint8_t* src_a,
|
||||||
|
const uint8_t* src_b,
|
||||||
|
int count);
|
||||||
|
uint32_t SumSquareError_NEON(const uint8_t* src_a,
|
||||||
|
const uint8_t* src_b,
|
||||||
|
int count);
|
||||||
|
uint32_t SumSquareError_MSA(const uint8_t* src_a,
|
||||||
|
const uint8_t* src_b,
|
||||||
|
int count);
|
||||||
|
|
||||||
uint32_t HashDjb2_C(const uint8_t* src, int count, uint32_t seed);
|
uint32_t HashDjb2_C(const uint8_t* src, int count, uint32_t seed);
|
||||||
uint32_t HashDjb2_SSE41(const uint8_t* src, int count, uint32_t seed);
|
uint32_t HashDjb2_SSE41(const uint8_t* src, int count, uint32_t seed);
|
||||||
|
|||||||
@ -352,7 +352,10 @@ int MJPGToI420(const uint8_t* sample,
|
|||||||
|
|
||||||
// Query size of MJPG in pixels.
|
// Query size of MJPG in pixels.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int MJPGSize(const uint8_t* sample, size_t sample_size, int* width, int* height);
|
int MJPGSize(const uint8_t* sample,
|
||||||
|
size_t sample_size,
|
||||||
|
int* width,
|
||||||
|
int* height);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Convert camera sample to I420 with cropping, rotation and vertical flip.
|
// Convert camera sample to I420 with cropping, rotation and vertical flip.
|
||||||
|
|||||||
@ -903,8 +903,12 @@ void RGBAToYRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width);
|
|||||||
void RGB24ToYRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
|
void RGB24ToYRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
|
||||||
void RAWToYRow_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width);
|
void RAWToYRow_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width);
|
||||||
void RGB565ToYRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_y, int width);
|
void RGB565ToYRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_y, int width);
|
||||||
void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555, uint8_t* dst_y, int width);
|
void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555,
|
||||||
void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444, uint8_t* dst_y, int width);
|
uint8_t* dst_y,
|
||||||
|
int width);
|
||||||
|
void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444,
|
||||||
|
uint8_t* dst_y,
|
||||||
|
int width);
|
||||||
void BGRAToYRow_MSA(const uint8_t* src_bgra, uint8_t* dst_y, int width);
|
void BGRAToYRow_MSA(const uint8_t* src_bgra, uint8_t* dst_y, int width);
|
||||||
void ABGRToYRow_MSA(const uint8_t* src_abgr, uint8_t* dst_y, int width);
|
void ABGRToYRow_MSA(const uint8_t* src_abgr, uint8_t* dst_y, int width);
|
||||||
void RGBAToYRow_MSA(const uint8_t* src_rgba, uint8_t* dst_y, int width);
|
void RGBAToYRow_MSA(const uint8_t* src_rgba, uint8_t* dst_y, int width);
|
||||||
@ -936,7 +940,9 @@ void ABGRToYRow_Any_NEON(const uint8_t* src_abgr, uint8_t* dst_y, int width);
|
|||||||
void RGBAToYRow_Any_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width);
|
void RGBAToYRow_Any_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width);
|
||||||
void RGB24ToYRow_Any_NEON(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
|
void RGB24ToYRow_Any_NEON(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
|
||||||
void RAWToYRow_Any_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width);
|
void RAWToYRow_Any_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width);
|
||||||
void RGB565ToYRow_Any_NEON(const uint8_t* src_rgb565, uint8_t* dst_y, int width);
|
void RGB565ToYRow_Any_NEON(const uint8_t* src_rgb565,
|
||||||
|
uint8_t* dst_y,
|
||||||
|
int width);
|
||||||
void ARGB1555ToYRow_Any_NEON(const uint8_t* src_argb1555,
|
void ARGB1555ToYRow_Any_NEON(const uint8_t* src_argb1555,
|
||||||
uint8_t* dst_y,
|
uint8_t* dst_y,
|
||||||
int width);
|
int width);
|
||||||
@ -951,7 +957,9 @@ void ARGBToYRow_Any_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width);
|
|||||||
void RGB24ToYRow_Any_MSA(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
|
void RGB24ToYRow_Any_MSA(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
|
||||||
void RAWToYRow_Any_MSA(const uint8_t* src_raw, uint8_t* dst_y, int width);
|
void RAWToYRow_Any_MSA(const uint8_t* src_raw, uint8_t* dst_y, int width);
|
||||||
void RGB565ToYRow_Any_MSA(const uint8_t* src_rgb565, uint8_t* dst_y, int width);
|
void RGB565ToYRow_Any_MSA(const uint8_t* src_rgb565, uint8_t* dst_y, int width);
|
||||||
void ARGB1555ToYRow_Any_MSA(const uint8_t* src_argb1555, uint8_t* dst_y, int width);
|
void ARGB1555ToYRow_Any_MSA(const uint8_t* src_argb1555,
|
||||||
|
uint8_t* dst_y,
|
||||||
|
int width);
|
||||||
|
|
||||||
void ARGBToUVRow_AVX2(const uint8_t* src_argb,
|
void ARGBToUVRow_AVX2(const uint8_t* src_argb,
|
||||||
int src_stride_argb,
|
int src_stride_argb,
|
||||||
@ -1224,7 +1232,10 @@ void MirrorUVRow_MSA(const uint8_t* src_uv,
|
|||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width);
|
int width);
|
||||||
void MirrorUVRow_C(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width);
|
void MirrorUVRow_C(const uint8_t* src_uv,
|
||||||
|
uint8_t* dst_u,
|
||||||
|
uint8_t* dst_v,
|
||||||
|
int width);
|
||||||
|
|
||||||
void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width);
|
void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width);
|
||||||
void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
|
void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
|
||||||
@ -1236,7 +1247,10 @@ void ARGBMirrorRow_Any_SSE2(const uint8_t* src, uint8_t* dst, int width);
|
|||||||
void ARGBMirrorRow_Any_NEON(const uint8_t* src, uint8_t* dst, int width);
|
void ARGBMirrorRow_Any_NEON(const uint8_t* src, uint8_t* dst, int width);
|
||||||
void ARGBMirrorRow_Any_MSA(const uint8_t* src, uint8_t* dst, int width);
|
void ARGBMirrorRow_Any_MSA(const uint8_t* src, uint8_t* dst, int width);
|
||||||
|
|
||||||
void SplitUVRow_C(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width);
|
void SplitUVRow_C(const uint8_t* src_uv,
|
||||||
|
uint8_t* dst_u,
|
||||||
|
uint8_t* dst_v,
|
||||||
|
int width);
|
||||||
void SplitUVRow_SSE2(const uint8_t* src_uv,
|
void SplitUVRow_SSE2(const uint8_t* src_uv,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
@ -1249,7 +1263,10 @@ void SplitUVRow_NEON(const uint8_t* src_uv,
|
|||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
int width);
|
int width);
|
||||||
void SplitUVRow_MSA(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width);
|
void SplitUVRow_MSA(const uint8_t* src_uv,
|
||||||
|
uint8_t* dst_u,
|
||||||
|
uint8_t* dst_v,
|
||||||
|
int width);
|
||||||
void SplitUVRow_Any_SSE2(const uint8_t* src_uv,
|
void SplitUVRow_Any_SSE2(const uint8_t* src_uv,
|
||||||
uint8_t* dst_u,
|
uint8_t* dst_u,
|
||||||
uint8_t* dst_v,
|
uint8_t* dst_v,
|
||||||
@ -1371,9 +1388,15 @@ void MultiplyRow_16_AVX2(const uint16_t* src_y,
|
|||||||
uint16_t* dst_y,
|
uint16_t* dst_y,
|
||||||
int scale,
|
int scale,
|
||||||
int width);
|
int width);
|
||||||
void MultiplyRow_16_C(const uint16_t* src_y, uint16_t* dst_y, int scale, int width);
|
void MultiplyRow_16_C(const uint16_t* src_y,
|
||||||
|
uint16_t* dst_y,
|
||||||
|
int scale,
|
||||||
|
int width);
|
||||||
|
|
||||||
void Convert8To16Row_C(const uint8_t* src_y, uint16_t* dst_y, int scale, int width);
|
void Convert8To16Row_C(const uint8_t* src_y,
|
||||||
|
uint16_t* dst_y,
|
||||||
|
int scale,
|
||||||
|
int width);
|
||||||
void Convert8To16Row_SSE2(const uint8_t* src_y,
|
void Convert8To16Row_SSE2(const uint8_t* src_y,
|
||||||
uint16_t* dst_y,
|
uint16_t* dst_y,
|
||||||
int scale,
|
int scale,
|
||||||
@ -1391,7 +1414,10 @@ void Convert8To16Row_Any_AVX2(const uint8_t* src_y,
|
|||||||
int scale,
|
int scale,
|
||||||
int width);
|
int width);
|
||||||
|
|
||||||
void Convert16To8Row_C(const uint16_t* src_y, uint8_t* dst_y, int scale, int width);
|
void Convert16To8Row_C(const uint16_t* src_y,
|
||||||
|
uint8_t* dst_y,
|
||||||
|
int scale,
|
||||||
|
int width);
|
||||||
void Convert16To8Row_SSSE3(const uint16_t* src_y,
|
void Convert16To8Row_SSSE3(const uint16_t* src_y,
|
||||||
uint8_t* dst_y,
|
uint8_t* dst_y,
|
||||||
int scale,
|
int scale,
|
||||||
@ -1422,8 +1448,12 @@ void CopyRow_Any_NEON(const uint8_t* src, uint8_t* dst, int count);
|
|||||||
void CopyRow_16_C(const uint16_t* src, uint16_t* dst, int count);
|
void CopyRow_16_C(const uint16_t* src, uint16_t* dst, int count);
|
||||||
|
|
||||||
void ARGBCopyAlphaRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width);
|
void ARGBCopyAlphaRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width);
|
||||||
void ARGBCopyAlphaRow_SSE2(const uint8_t* src_argb, uint8_t* dst_argb, int width);
|
void ARGBCopyAlphaRow_SSE2(const uint8_t* src_argb,
|
||||||
void ARGBCopyAlphaRow_AVX2(const uint8_t* src_argb, uint8_t* dst_argb, int width);
|
uint8_t* dst_argb,
|
||||||
|
int width);
|
||||||
|
void ARGBCopyAlphaRow_AVX2(const uint8_t* src_argb,
|
||||||
|
uint8_t* dst_argb,
|
||||||
|
int width);
|
||||||
void ARGBCopyAlphaRow_Any_SSE2(const uint8_t* src_argb,
|
void ARGBCopyAlphaRow_Any_SSE2(const uint8_t* src_argb,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width);
|
int width);
|
||||||
@ -1432,10 +1462,18 @@ void ARGBCopyAlphaRow_Any_AVX2(const uint8_t* src_argb,
|
|||||||
int width);
|
int width);
|
||||||
|
|
||||||
void ARGBExtractAlphaRow_C(const uint8_t* src_argb, uint8_t* dst_a, int width);
|
void ARGBExtractAlphaRow_C(const uint8_t* src_argb, uint8_t* dst_a, int width);
|
||||||
void ARGBExtractAlphaRow_SSE2(const uint8_t* src_argb, uint8_t* dst_a, int width);
|
void ARGBExtractAlphaRow_SSE2(const uint8_t* src_argb,
|
||||||
void ARGBExtractAlphaRow_AVX2(const uint8_t* src_argb, uint8_t* dst_a, int width);
|
uint8_t* dst_a,
|
||||||
void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb, uint8_t* dst_a, int width);
|
int width);
|
||||||
void ARGBExtractAlphaRow_MSA(const uint8_t* src_argb, uint8_t* dst_a, int width);
|
void ARGBExtractAlphaRow_AVX2(const uint8_t* src_argb,
|
||||||
|
uint8_t* dst_a,
|
||||||
|
int width);
|
||||||
|
void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb,
|
||||||
|
uint8_t* dst_a,
|
||||||
|
int width);
|
||||||
|
void ARGBExtractAlphaRow_MSA(const uint8_t* src_argb,
|
||||||
|
uint8_t* dst_a,
|
||||||
|
int width);
|
||||||
void ARGBExtractAlphaRow_Any_SSE2(const uint8_t* src_argb,
|
void ARGBExtractAlphaRow_Any_SSE2(const uint8_t* src_argb,
|
||||||
uint8_t* dst_a,
|
uint8_t* dst_a,
|
||||||
int width);
|
int width);
|
||||||
@ -1450,8 +1488,12 @@ void ARGBExtractAlphaRow_Any_MSA(const uint8_t* src_argb,
|
|||||||
int width);
|
int width);
|
||||||
|
|
||||||
void ARGBCopyYToAlphaRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width);
|
void ARGBCopyYToAlphaRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width);
|
||||||
void ARGBCopyYToAlphaRow_SSE2(const uint8_t* src_y, uint8_t* dst_argb, int width);
|
void ARGBCopyYToAlphaRow_SSE2(const uint8_t* src_y,
|
||||||
void ARGBCopyYToAlphaRow_AVX2(const uint8_t* src_y, uint8_t* dst_argb, int width);
|
uint8_t* dst_argb,
|
||||||
|
int width);
|
||||||
|
void ARGBCopyYToAlphaRow_AVX2(const uint8_t* src_y,
|
||||||
|
uint8_t* dst_argb,
|
||||||
|
int width);
|
||||||
void ARGBCopyYToAlphaRow_Any_SSE2(const uint8_t* src_y,
|
void ARGBCopyYToAlphaRow_Any_SSE2(const uint8_t* src_y,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width);
|
int width);
|
||||||
@ -1512,17 +1554,23 @@ void ARGBShuffleRow_Any_MSA(const uint8_t* src_argb,
|
|||||||
const uint8_t* shuffler,
|
const uint8_t* shuffler,
|
||||||
int width);
|
int width);
|
||||||
|
|
||||||
void RGB24ToARGBRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_argb, int width);
|
void RGB24ToARGBRow_SSSE3(const uint8_t* src_rgb24,
|
||||||
|
uint8_t* dst_argb,
|
||||||
|
int width);
|
||||||
void RAWToARGBRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_argb, int width);
|
void RAWToARGBRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_argb, int width);
|
||||||
void RAWToRGB24Row_SSSE3(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
|
void RAWToRGB24Row_SSSE3(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
|
||||||
void RGB565ToARGBRow_SSE2(const uint8_t* src_rgb565, uint8_t* dst_argb, int width);
|
void RGB565ToARGBRow_SSE2(const uint8_t* src_rgb565,
|
||||||
|
uint8_t* dst_argb,
|
||||||
|
int width);
|
||||||
void ARGB1555ToARGBRow_SSE2(const uint8_t* src_argb1555,
|
void ARGB1555ToARGBRow_SSE2(const uint8_t* src_argb1555,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width);
|
int width);
|
||||||
void ARGB4444ToARGBRow_SSE2(const uint8_t* src_argb4444,
|
void ARGB4444ToARGBRow_SSE2(const uint8_t* src_argb4444,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width);
|
int width);
|
||||||
void RGB565ToARGBRow_AVX2(const uint8_t* src_rgb565, uint8_t* dst_argb, int width);
|
void RGB565ToARGBRow_AVX2(const uint8_t* src_rgb565,
|
||||||
|
uint8_t* dst_argb,
|
||||||
|
int width);
|
||||||
void ARGB1555ToARGBRow_AVX2(const uint8_t* src_argb1555,
|
void ARGB1555ToARGBRow_AVX2(const uint8_t* src_argb1555,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width);
|
int width);
|
||||||
@ -1530,14 +1578,20 @@ void ARGB4444ToARGBRow_AVX2(const uint8_t* src_argb4444,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width);
|
int width);
|
||||||
|
|
||||||
void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_argb, int width);
|
void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24,
|
||||||
|
uint8_t* dst_argb,
|
||||||
|
int width);
|
||||||
void RGB24ToARGBRow_MSA(const uint8_t* src_rgb24, uint8_t* dst_argb, int width);
|
void RGB24ToARGBRow_MSA(const uint8_t* src_rgb24, uint8_t* dst_argb, int width);
|
||||||
void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width);
|
void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width);
|
||||||
void RAWToARGBRow_MSA(const uint8_t* src_raw, uint8_t* dst_argb, int width);
|
void RAWToARGBRow_MSA(const uint8_t* src_raw, uint8_t* dst_argb, int width);
|
||||||
void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
|
void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
|
||||||
void RAWToRGB24Row_MSA(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
|
void RAWToRGB24Row_MSA(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
|
||||||
void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_argb, int width);
|
void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565,
|
||||||
void RGB565ToARGBRow_MSA(const uint8_t* src_rgb565, uint8_t* dst_argb, int width);
|
uint8_t* dst_argb,
|
||||||
|
int width);
|
||||||
|
void RGB565ToARGBRow_MSA(const uint8_t* src_rgb565,
|
||||||
|
uint8_t* dst_argb,
|
||||||
|
int width);
|
||||||
void ARGB1555ToARGBRow_NEON(const uint8_t* src_argb1555,
|
void ARGB1555ToARGBRow_NEON(const uint8_t* src_argb1555,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width);
|
int width);
|
||||||
@ -1560,8 +1614,12 @@ void AR30ToARGBRow_C(const uint8_t* src_ar30, uint8_t* dst_argb, int width);
|
|||||||
void RGB24ToARGBRow_Any_SSSE3(const uint8_t* src_rgb24,
|
void RGB24ToARGBRow_Any_SSSE3(const uint8_t* src_rgb24,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width);
|
int width);
|
||||||
void RAWToARGBRow_Any_SSSE3(const uint8_t* src_raw, uint8_t* dst_argb, int width);
|
void RAWToARGBRow_Any_SSSE3(const uint8_t* src_raw,
|
||||||
void RAWToRGB24Row_Any_SSSE3(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
|
uint8_t* dst_argb,
|
||||||
|
int width);
|
||||||
|
void RAWToRGB24Row_Any_SSSE3(const uint8_t* src_raw,
|
||||||
|
uint8_t* dst_rgb24,
|
||||||
|
int width);
|
||||||
|
|
||||||
void RGB565ToARGBRow_Any_SSE2(const uint8_t* src_rgb565,
|
void RGB565ToARGBRow_Any_SSE2(const uint8_t* src_rgb565,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
@ -1585,11 +1643,19 @@ void ARGB4444ToARGBRow_Any_AVX2(const uint8_t* src_argb4444,
|
|||||||
void RGB24ToARGBRow_Any_NEON(const uint8_t* src_rgb24,
|
void RGB24ToARGBRow_Any_NEON(const uint8_t* src_rgb24,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width);
|
int width);
|
||||||
void RGB24ToARGBRow_Any_MSA(const uint8_t* src_rgb24, uint8_t* dst_argb, int width);
|
void RGB24ToARGBRow_Any_MSA(const uint8_t* src_rgb24,
|
||||||
void RAWToARGBRow_Any_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width);
|
uint8_t* dst_argb,
|
||||||
|
int width);
|
||||||
|
void RAWToARGBRow_Any_NEON(const uint8_t* src_raw,
|
||||||
|
uint8_t* dst_argb,
|
||||||
|
int width);
|
||||||
void RAWToARGBRow_Any_MSA(const uint8_t* src_raw, uint8_t* dst_argb, int width);
|
void RAWToARGBRow_Any_MSA(const uint8_t* src_raw, uint8_t* dst_argb, int width);
|
||||||
void RAWToRGB24Row_Any_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
|
void RAWToRGB24Row_Any_NEON(const uint8_t* src_raw,
|
||||||
void RAWToRGB24Row_Any_MSA(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
|
uint8_t* dst_rgb24,
|
||||||
|
int width);
|
||||||
|
void RAWToRGB24Row_Any_MSA(const uint8_t* src_raw,
|
||||||
|
uint8_t* dst_rgb24,
|
||||||
|
int width);
|
||||||
void RGB565ToARGBRow_Any_NEON(const uint8_t* src_rgb565,
|
void RGB565ToARGBRow_Any_NEON(const uint8_t* src_rgb565,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width);
|
int width);
|
||||||
@ -1613,8 +1679,12 @@ void ARGB4444ToARGBRow_Any_MSA(const uint8_t* src_argb4444,
|
|||||||
void ARGBToRGB24Row_SSSE3(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
void ARGBToRGB24Row_SSSE3(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
||||||
void ARGBToRAWRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
void ARGBToRAWRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
||||||
void ARGBToRGB565Row_SSE2(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
void ARGBToRGB565Row_SSE2(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
||||||
void ARGBToARGB1555Row_SSE2(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
void ARGBToARGB1555Row_SSE2(const uint8_t* src_argb,
|
||||||
void ARGBToARGB4444Row_SSE2(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
uint8_t* dst_rgb,
|
||||||
|
int width);
|
||||||
|
void ARGBToARGB4444Row_SSE2(const uint8_t* src_argb,
|
||||||
|
uint8_t* dst_rgb,
|
||||||
|
int width);
|
||||||
void ARGBToAR30Row_SSSE3(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
void ARGBToAR30Row_SSSE3(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
||||||
|
|
||||||
void ARGBToRGB565DitherRow_C(const uint8_t* src_argb,
|
void ARGBToRGB565DitherRow_C(const uint8_t* src_argb,
|
||||||
@ -1631,15 +1701,23 @@ void ARGBToRGB565DitherRow_AVX2(const uint8_t* src_argb,
|
|||||||
int width);
|
int width);
|
||||||
|
|
||||||
void ARGBToRGB565Row_AVX2(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
void ARGBToRGB565Row_AVX2(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
||||||
void ARGBToARGB1555Row_AVX2(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
void ARGBToARGB1555Row_AVX2(const uint8_t* src_argb,
|
||||||
void ARGBToARGB4444Row_AVX2(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
uint8_t* dst_rgb,
|
||||||
|
int width);
|
||||||
|
void ARGBToARGB4444Row_AVX2(const uint8_t* src_argb,
|
||||||
|
uint8_t* dst_rgb,
|
||||||
|
int width);
|
||||||
void ARGBToAR30Row_AVX2(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
void ARGBToAR30Row_AVX2(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
||||||
|
|
||||||
void ARGBToRGB24Row_NEON(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
void ARGBToRGB24Row_NEON(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
||||||
void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
||||||
void ARGBToRGB565Row_NEON(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
void ARGBToRGB565Row_NEON(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
||||||
void ARGBToARGB1555Row_NEON(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
void ARGBToARGB1555Row_NEON(const uint8_t* src_argb,
|
||||||
void ARGBToARGB4444Row_NEON(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
uint8_t* dst_rgb,
|
||||||
|
int width);
|
||||||
|
void ARGBToARGB4444Row_NEON(const uint8_t* src_argb,
|
||||||
|
uint8_t* dst_rgb,
|
||||||
|
int width);
|
||||||
void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb,
|
void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb,
|
||||||
uint8_t* dst_rgb,
|
uint8_t* dst_rgb,
|
||||||
const uint32_t dither4,
|
const uint32_t dither4,
|
||||||
@ -1647,8 +1725,12 @@ void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb,
|
|||||||
void ARGBToRGB24Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
void ARGBToRGB24Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
||||||
void ARGBToRAWRow_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
void ARGBToRAWRow_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
||||||
void ARGBToRGB565Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
void ARGBToRGB565Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
||||||
void ARGBToARGB1555Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
void ARGBToARGB1555Row_MSA(const uint8_t* src_argb,
|
||||||
void ARGBToARGB4444Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
uint8_t* dst_rgb,
|
||||||
|
int width);
|
||||||
|
void ARGBToARGB4444Row_MSA(const uint8_t* src_argb,
|
||||||
|
uint8_t* dst_rgb,
|
||||||
|
int width);
|
||||||
void ARGBToRGB565DitherRow_MSA(const uint8_t* src_argb,
|
void ARGBToRGB565DitherRow_MSA(const uint8_t* src_argb,
|
||||||
uint8_t* dst_rgb,
|
uint8_t* dst_rgb,
|
||||||
const uint32_t dither4,
|
const uint32_t dither4,
|
||||||
@ -2283,16 +2365,24 @@ void ARGBSubtractRow_Any_MSA(const uint8_t* src_argb,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width);
|
int width);
|
||||||
|
|
||||||
void ARGBToRGB24Row_Any_SSSE3(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
void ARGBToRGB24Row_Any_SSSE3(const uint8_t* src_argb,
|
||||||
void ARGBToRAWRow_Any_SSSE3(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
uint8_t* dst_rgb,
|
||||||
void ARGBToRGB565Row_Any_SSE2(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
int width);
|
||||||
|
void ARGBToRAWRow_Any_SSSE3(const uint8_t* src_argb,
|
||||||
|
uint8_t* dst_rgb,
|
||||||
|
int width);
|
||||||
|
void ARGBToRGB565Row_Any_SSE2(const uint8_t* src_argb,
|
||||||
|
uint8_t* dst_rgb,
|
||||||
|
int width);
|
||||||
void ARGBToARGB1555Row_Any_SSE2(const uint8_t* src_argb,
|
void ARGBToARGB1555Row_Any_SSE2(const uint8_t* src_argb,
|
||||||
uint8_t* dst_rgb,
|
uint8_t* dst_rgb,
|
||||||
int width);
|
int width);
|
||||||
void ARGBToARGB4444Row_Any_SSE2(const uint8_t* src_argb,
|
void ARGBToARGB4444Row_Any_SSE2(const uint8_t* src_argb,
|
||||||
uint8_t* dst_rgb,
|
uint8_t* dst_rgb,
|
||||||
int width);
|
int width);
|
||||||
void ARGBToAR30Row_Any_SSSE3(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
void ARGBToAR30Row_Any_SSSE3(const uint8_t* src_argb,
|
||||||
|
uint8_t* dst_rgb,
|
||||||
|
int width);
|
||||||
|
|
||||||
void ARGBToRGB565DitherRow_Any_SSE2(const uint8_t* src_argb,
|
void ARGBToRGB565DitherRow_Any_SSE2(const uint8_t* src_argb,
|
||||||
uint8_t* dst_rgb,
|
uint8_t* dst_rgb,
|
||||||
@ -2303,18 +2393,28 @@ void ARGBToRGB565DitherRow_Any_AVX2(const uint8_t* src_argb,
|
|||||||
const uint32_t dither4,
|
const uint32_t dither4,
|
||||||
int width);
|
int width);
|
||||||
|
|
||||||
void ARGBToRGB565Row_Any_AVX2(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
void ARGBToRGB565Row_Any_AVX2(const uint8_t* src_argb,
|
||||||
|
uint8_t* dst_rgb,
|
||||||
|
int width);
|
||||||
void ARGBToARGB1555Row_Any_AVX2(const uint8_t* src_argb,
|
void ARGBToARGB1555Row_Any_AVX2(const uint8_t* src_argb,
|
||||||
uint8_t* dst_rgb,
|
uint8_t* dst_rgb,
|
||||||
int width);
|
int width);
|
||||||
void ARGBToARGB4444Row_Any_AVX2(const uint8_t* src_argb,
|
void ARGBToARGB4444Row_Any_AVX2(const uint8_t* src_argb,
|
||||||
uint8_t* dst_rgb,
|
uint8_t* dst_rgb,
|
||||||
int width);
|
int width);
|
||||||
void ARGBToAR30Row_Any_AVX2(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
void ARGBToAR30Row_Any_AVX2(const uint8_t* src_argb,
|
||||||
|
uint8_t* dst_rgb,
|
||||||
|
int width);
|
||||||
|
|
||||||
void ARGBToRGB24Row_Any_NEON(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
void ARGBToRGB24Row_Any_NEON(const uint8_t* src_argb,
|
||||||
void ARGBToRAWRow_Any_NEON(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
uint8_t* dst_rgb,
|
||||||
void ARGBToRGB565Row_Any_NEON(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
int width);
|
||||||
|
void ARGBToRAWRow_Any_NEON(const uint8_t* src_argb,
|
||||||
|
uint8_t* dst_rgb,
|
||||||
|
int width);
|
||||||
|
void ARGBToRGB565Row_Any_NEON(const uint8_t* src_argb,
|
||||||
|
uint8_t* dst_rgb,
|
||||||
|
int width);
|
||||||
void ARGBToARGB1555Row_Any_NEON(const uint8_t* src_argb,
|
void ARGBToARGB1555Row_Any_NEON(const uint8_t* src_argb,
|
||||||
uint8_t* dst_rgb,
|
uint8_t* dst_rgb,
|
||||||
int width);
|
int width);
|
||||||
@ -2325,9 +2425,13 @@ void ARGBToRGB565DitherRow_Any_NEON(const uint8_t* src_argb,
|
|||||||
uint8_t* dst_rgb,
|
uint8_t* dst_rgb,
|
||||||
const uint32_t dither4,
|
const uint32_t dither4,
|
||||||
int width);
|
int width);
|
||||||
void ARGBToRGB24Row_Any_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
void ARGBToRGB24Row_Any_MSA(const uint8_t* src_argb,
|
||||||
|
uint8_t* dst_rgb,
|
||||||
|
int width);
|
||||||
void ARGBToRAWRow_Any_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
void ARGBToRAWRow_Any_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
||||||
void ARGBToRGB565Row_Any_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
|
void ARGBToRGB565Row_Any_MSA(const uint8_t* src_argb,
|
||||||
|
uint8_t* dst_rgb,
|
||||||
|
int width);
|
||||||
void ARGBToARGB1555Row_Any_MSA(const uint8_t* src_argb,
|
void ARGBToARGB1555Row_Any_MSA(const uint8_t* src_argb,
|
||||||
uint8_t* dst_rgb,
|
uint8_t* dst_rgb,
|
||||||
int width);
|
int width);
|
||||||
@ -2749,10 +2853,18 @@ void I422ToUYVYRow_Any_MSA(const uint8_t* src_y,
|
|||||||
|
|
||||||
// Effects related row functions.
|
// Effects related row functions.
|
||||||
void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width);
|
void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width);
|
||||||
void ARGBAttenuateRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_argb, int width);
|
void ARGBAttenuateRow_SSSE3(const uint8_t* src_argb,
|
||||||
void ARGBAttenuateRow_AVX2(const uint8_t* src_argb, uint8_t* dst_argb, int width);
|
uint8_t* dst_argb,
|
||||||
void ARGBAttenuateRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width);
|
int width);
|
||||||
void ARGBAttenuateRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, int width);
|
void ARGBAttenuateRow_AVX2(const uint8_t* src_argb,
|
||||||
|
uint8_t* dst_argb,
|
||||||
|
int width);
|
||||||
|
void ARGBAttenuateRow_NEON(const uint8_t* src_argb,
|
||||||
|
uint8_t* dst_argb,
|
||||||
|
int width);
|
||||||
|
void ARGBAttenuateRow_MSA(const uint8_t* src_argb,
|
||||||
|
uint8_t* dst_argb,
|
||||||
|
int width);
|
||||||
void ARGBAttenuateRow_Any_SSSE3(const uint8_t* src_argb,
|
void ARGBAttenuateRow_Any_SSSE3(const uint8_t* src_argb,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width);
|
int width);
|
||||||
@ -2768,9 +2880,15 @@ void ARGBAttenuateRow_Any_MSA(const uint8_t* src_argb,
|
|||||||
|
|
||||||
// Inverse table for unattenuate, shared by C and SSE2.
|
// Inverse table for unattenuate, shared by C and SSE2.
|
||||||
extern const uint32_t fixed_invtbl8[256];
|
extern const uint32_t fixed_invtbl8[256];
|
||||||
void ARGBUnattenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width);
|
void ARGBUnattenuateRow_C(const uint8_t* src_argb,
|
||||||
void ARGBUnattenuateRow_SSE2(const uint8_t* src_argb, uint8_t* dst_argb, int width);
|
uint8_t* dst_argb,
|
||||||
void ARGBUnattenuateRow_AVX2(const uint8_t* src_argb, uint8_t* dst_argb, int width);
|
int width);
|
||||||
|
void ARGBUnattenuateRow_SSE2(const uint8_t* src_argb,
|
||||||
|
uint8_t* dst_argb,
|
||||||
|
int width);
|
||||||
|
void ARGBUnattenuateRow_AVX2(const uint8_t* src_argb,
|
||||||
|
uint8_t* dst_argb,
|
||||||
|
int width);
|
||||||
void ARGBUnattenuateRow_Any_SSE2(const uint8_t* src_argb,
|
void ARGBUnattenuateRow_Any_SSE2(const uint8_t* src_argb,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width);
|
int width);
|
||||||
@ -2805,11 +2923,19 @@ void ARGBColorMatrixRow_MSA(const uint8_t* src_argb,
|
|||||||
const int8_t* matrix_argb,
|
const int8_t* matrix_argb,
|
||||||
int width);
|
int width);
|
||||||
|
|
||||||
void ARGBColorTableRow_C(uint8_t* dst_argb, const uint8_t* table_argb, int width);
|
void ARGBColorTableRow_C(uint8_t* dst_argb,
|
||||||
void ARGBColorTableRow_X86(uint8_t* dst_argb, const uint8_t* table_argb, int width);
|
const uint8_t* table_argb,
|
||||||
|
int width);
|
||||||
|
void ARGBColorTableRow_X86(uint8_t* dst_argb,
|
||||||
|
const uint8_t* table_argb,
|
||||||
|
int width);
|
||||||
|
|
||||||
void RGBColorTableRow_C(uint8_t* dst_argb, const uint8_t* table_argb, int width);
|
void RGBColorTableRow_C(uint8_t* dst_argb,
|
||||||
void RGBColorTableRow_X86(uint8_t* dst_argb, const uint8_t* table_argb, int width);
|
const uint8_t* table_argb,
|
||||||
|
int width);
|
||||||
|
void RGBColorTableRow_X86(uint8_t* dst_argb,
|
||||||
|
const uint8_t* table_argb,
|
||||||
|
int width);
|
||||||
|
|
||||||
void ARGBQuantizeRow_C(uint8_t* dst_argb,
|
void ARGBQuantizeRow_C(uint8_t* dst_argb,
|
||||||
int scale,
|
int scale,
|
||||||
@ -3075,37 +3201,58 @@ void ARGBPolynomialRow_AVX2(const uint8_t* src_argb,
|
|||||||
|
|
||||||
// Scale and convert to half float.
|
// Scale and convert to half float.
|
||||||
void HalfFloatRow_C(const uint16_t* src, uint16_t* dst, float scale, int width);
|
void HalfFloatRow_C(const uint16_t* src, uint16_t* dst, float scale, int width);
|
||||||
void HalfFloatRow_SSE2(const uint16_t* src, uint16_t* dst, float scale, int width);
|
void HalfFloatRow_SSE2(const uint16_t* src,
|
||||||
|
uint16_t* dst,
|
||||||
|
float scale,
|
||||||
|
int width);
|
||||||
void HalfFloatRow_Any_SSE2(const uint16_t* src,
|
void HalfFloatRow_Any_SSE2(const uint16_t* src,
|
||||||
uint16_t* dst,
|
uint16_t* dst,
|
||||||
float scale,
|
float scale,
|
||||||
int width);
|
int width);
|
||||||
void HalfFloatRow_AVX2(const uint16_t* src, uint16_t* dst, float scale, int width);
|
void HalfFloatRow_AVX2(const uint16_t* src,
|
||||||
|
uint16_t* dst,
|
||||||
|
float scale,
|
||||||
|
int width);
|
||||||
void HalfFloatRow_Any_AVX2(const uint16_t* src,
|
void HalfFloatRow_Any_AVX2(const uint16_t* src,
|
||||||
uint16_t* dst,
|
uint16_t* dst,
|
||||||
float scale,
|
float scale,
|
||||||
int width);
|
int width);
|
||||||
void HalfFloatRow_F16C(const uint16_t* src, uint16_t* dst, float scale, int width);
|
void HalfFloatRow_F16C(const uint16_t* src,
|
||||||
|
uint16_t* dst,
|
||||||
|
float scale,
|
||||||
|
int width);
|
||||||
void HalfFloatRow_Any_F16C(const uint16_t* src,
|
void HalfFloatRow_Any_F16C(const uint16_t* src,
|
||||||
uint16_t* dst,
|
uint16_t* dst,
|
||||||
float scale,
|
float scale,
|
||||||
int width);
|
int width);
|
||||||
void HalfFloat1Row_F16C(const uint16_t* src, uint16_t* dst, float scale, int width);
|
void HalfFloat1Row_F16C(const uint16_t* src,
|
||||||
|
uint16_t* dst,
|
||||||
|
float scale,
|
||||||
|
int width);
|
||||||
void HalfFloat1Row_Any_F16C(const uint16_t* src,
|
void HalfFloat1Row_Any_F16C(const uint16_t* src,
|
||||||
uint16_t* dst,
|
uint16_t* dst,
|
||||||
float scale,
|
float scale,
|
||||||
int width);
|
int width);
|
||||||
void HalfFloatRow_NEON(const uint16_t* src, uint16_t* dst, float scale, int width);
|
void HalfFloatRow_NEON(const uint16_t* src,
|
||||||
|
uint16_t* dst,
|
||||||
|
float scale,
|
||||||
|
int width);
|
||||||
void HalfFloatRow_Any_NEON(const uint16_t* src,
|
void HalfFloatRow_Any_NEON(const uint16_t* src,
|
||||||
uint16_t* dst,
|
uint16_t* dst,
|
||||||
float scale,
|
float scale,
|
||||||
int width);
|
int width);
|
||||||
void HalfFloat1Row_NEON(const uint16_t* src, uint16_t* dst, float scale, int width);
|
void HalfFloat1Row_NEON(const uint16_t* src,
|
||||||
|
uint16_t* dst,
|
||||||
|
float scale,
|
||||||
|
int width);
|
||||||
void HalfFloat1Row_Any_NEON(const uint16_t* src,
|
void HalfFloat1Row_Any_NEON(const uint16_t* src,
|
||||||
uint16_t* dst,
|
uint16_t* dst,
|
||||||
float scale,
|
float scale,
|
||||||
int width);
|
int width);
|
||||||
void HalfFloatRow_MSA(const uint16_t* src, uint16_t* dst, float scale, int width);
|
void HalfFloatRow_MSA(const uint16_t* src,
|
||||||
|
uint16_t* dst,
|
||||||
|
float scale,
|
||||||
|
int width);
|
||||||
void HalfFloatRow_Any_MSA(const uint16_t* src,
|
void HalfFloatRow_Any_MSA(const uint16_t* src,
|
||||||
uint16_t* dst,
|
uint16_t* dst,
|
||||||
float scale,
|
float scale,
|
||||||
|
|||||||
@ -302,7 +302,9 @@ void ScaleRowDown38_2_Box_16_C(const uint16_t* src_ptr,
|
|||||||
uint16_t* dst_ptr,
|
uint16_t* dst_ptr,
|
||||||
int dst_width);
|
int dst_width);
|
||||||
void ScaleAddRow_C(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width);
|
void ScaleAddRow_C(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width);
|
||||||
void ScaleAddRow_16_C(const uint16_t* src_ptr, uint32_t* dst_ptr, int src_width);
|
void ScaleAddRow_16_C(const uint16_t* src_ptr,
|
||||||
|
uint32_t* dst_ptr,
|
||||||
|
int src_width);
|
||||||
void ScaleARGBRowDown2_C(const uint8_t* src_argb,
|
void ScaleARGBRowDown2_C(const uint8_t* src_argb,
|
||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
@ -493,8 +495,12 @@ void ScaleRowDown38_2_Box_Any_SSSE3(const uint8_t* src_ptr,
|
|||||||
|
|
||||||
void ScaleAddRow_SSE2(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width);
|
void ScaleAddRow_SSE2(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width);
|
||||||
void ScaleAddRow_AVX2(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width);
|
void ScaleAddRow_AVX2(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width);
|
||||||
void ScaleAddRow_Any_SSE2(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width);
|
void ScaleAddRow_Any_SSE2(const uint8_t* src_ptr,
|
||||||
void ScaleAddRow_Any_AVX2(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width);
|
uint16_t* dst_ptr,
|
||||||
|
int src_width);
|
||||||
|
void ScaleAddRow_Any_AVX2(const uint8_t* src_ptr,
|
||||||
|
uint16_t* dst_ptr,
|
||||||
|
int src_width);
|
||||||
|
|
||||||
void ScaleFilterCols_SSSE3(uint8_t* dst_ptr,
|
void ScaleFilterCols_SSSE3(uint8_t* dst_ptr,
|
||||||
const uint8_t* src_ptr,
|
const uint8_t* src_ptr,
|
||||||
@ -810,7 +816,9 @@ void ScaleRowDown38_2_Box_Any_NEON(const uint8_t* src_ptr,
|
|||||||
int dst_width);
|
int dst_width);
|
||||||
|
|
||||||
void ScaleAddRow_NEON(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width);
|
void ScaleAddRow_NEON(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width);
|
||||||
void ScaleAddRow_Any_NEON(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width);
|
void ScaleAddRow_Any_NEON(const uint8_t* src_ptr,
|
||||||
|
uint16_t* dst_ptr,
|
||||||
|
int src_width);
|
||||||
|
|
||||||
void ScaleFilterCols_NEON(uint8_t* dst_ptr,
|
void ScaleFilterCols_NEON(uint8_t* dst_ptr,
|
||||||
const uint8_t* src_ptr,
|
const uint8_t* src_ptr,
|
||||||
|
|||||||
@ -32,7 +32,8 @@ LIBYUV_API
|
|||||||
uint32_t HashDjb2(const uint8_t* src, uint64_t count, uint32_t seed) {
|
uint32_t HashDjb2(const uint8_t* src, uint64_t count, uint32_t seed) {
|
||||||
const int kBlockSize = 1 << 15; // 32768;
|
const int kBlockSize = 1 << 15; // 32768;
|
||||||
int remainder;
|
int remainder;
|
||||||
uint32_t (*HashDjb2_SSE)(const uint8_t* src, int count, uint32_t seed) = HashDjb2_C;
|
uint32_t (*HashDjb2_SSE)(const uint8_t* src, int count, uint32_t seed) =
|
||||||
|
HashDjb2_C;
|
||||||
#if defined(HAS_HASHDJB2_SSE41)
|
#if defined(HAS_HASHDJB2_SSE41)
|
||||||
if (TestCpuFlag(kCpuHasSSE41)) {
|
if (TestCpuFlag(kCpuHasSSE41)) {
|
||||||
HashDjb2_SSE = HashDjb2_SSE41;
|
HashDjb2_SSE = HashDjb2_SSE41;
|
||||||
@ -93,7 +94,10 @@ static uint32_t ARGBDetectRow_C(const uint8_t* argb, int width) {
|
|||||||
// Scan an opaque argb image and return fourcc based on alpha offset.
|
// Scan an opaque argb image and return fourcc based on alpha offset.
|
||||||
// Returns FOURCC_ARGB, FOURCC_BGRA, or 0 if unknown.
|
// Returns FOURCC_ARGB, FOURCC_BGRA, or 0 if unknown.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
uint32_t ARGBDetect(const uint8_t* argb, int stride_argb, int width, int height) {
|
uint32_t ARGBDetect(const uint8_t* argb,
|
||||||
|
int stride_argb,
|
||||||
|
int width,
|
||||||
|
int height) {
|
||||||
uint32_t fourcc = 0;
|
uint32_t fourcc = 0;
|
||||||
int h;
|
int h;
|
||||||
|
|
||||||
@ -123,8 +127,8 @@ uint64_t ComputeHammingDistance(const uint8_t* src_a,
|
|||||||
int remainder = count & (kBlockSize - 1) & ~(kSimdSize - 1);
|
int remainder = count & (kBlockSize - 1) & ~(kSimdSize - 1);
|
||||||
uint64_t diff = 0;
|
uint64_t diff = 0;
|
||||||
int i;
|
int i;
|
||||||
uint32_t (*HammingDistance)(const uint8_t* src_a, const uint8_t* src_b, int count) =
|
uint32_t (*HammingDistance)(const uint8_t* src_a, const uint8_t* src_b,
|
||||||
HammingDistance_C;
|
int count) = HammingDistance_C;
|
||||||
#if defined(HAS_HAMMINGDISTANCE_NEON)
|
#if defined(HAS_HAMMINGDISTANCE_NEON)
|
||||||
if (TestCpuFlag(kCpuHasNEON)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
HammingDistance = HammingDistance_NEON;
|
HammingDistance = HammingDistance_NEON;
|
||||||
@ -182,8 +186,8 @@ uint64_t ComputeSumSquareError(const uint8_t* src_a,
|
|||||||
int remainder = count & (kBlockSize - 1) & ~31;
|
int remainder = count & (kBlockSize - 1) & ~31;
|
||||||
uint64_t sse = 0;
|
uint64_t sse = 0;
|
||||||
int i;
|
int i;
|
||||||
uint32_t (*SumSquareError)(const uint8_t* src_a, const uint8_t* src_b, int count) =
|
uint32_t (*SumSquareError)(const uint8_t* src_a, const uint8_t* src_b,
|
||||||
SumSquareError_C;
|
int count) = SumSquareError_C;
|
||||||
#if defined(HAS_SUMSQUAREERROR_NEON)
|
#if defined(HAS_SUMSQUAREERROR_NEON)
|
||||||
if (TestCpuFlag(kCpuHasNEON)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
SumSquareError = SumSquareError_NEON;
|
SumSquareError = SumSquareError_NEON;
|
||||||
@ -293,8 +297,8 @@ double I420Psnr(const uint8_t* src_y_a,
|
|||||||
int stride_v_b,
|
int stride_v_b,
|
||||||
int width,
|
int width,
|
||||||
int height) {
|
int height) {
|
||||||
const uint64_t sse_y = ComputeSumSquareErrorPlane(src_y_a, stride_y_a, src_y_b,
|
const uint64_t sse_y = ComputeSumSquareErrorPlane(
|
||||||
stride_y_b, width, height);
|
src_y_a, stride_y_a, src_y_b, stride_y_b, width, height);
|
||||||
const int width_uv = (width + 1) >> 1;
|
const int width_uv = (width + 1) >> 1;
|
||||||
const int height_uv = (height + 1) >> 1;
|
const int height_uv = (height + 1) >> 1;
|
||||||
const uint64_t sse_u = ComputeSumSquareErrorPlane(
|
const uint64_t sse_u = ComputeSumSquareErrorPlane(
|
||||||
|
|||||||
@ -18,7 +18,9 @@ extern "C" {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if ORIGINAL_OPT
|
#if ORIGINAL_OPT
|
||||||
uint32_t HammingDistance_C1(const uint8_t* src_a, const uint8_t* src_b, int count) {
|
uint32_t HammingDistance_C1(const uint8_t* src_a,
|
||||||
|
const uint8_t* src_b,
|
||||||
|
int count) {
|
||||||
uint32_t diff = 0u;
|
uint32_t diff = 0u;
|
||||||
|
|
||||||
int i;
|
int i;
|
||||||
@ -46,12 +48,14 @@ uint32_t HammingDistance_C1(const uint8_t* src_a, const uint8_t* src_b, int coun
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Hakmem method for hamming distance.
|
// Hakmem method for hamming distance.
|
||||||
uint32_t HammingDistance_C(const uint8_t* src_a, const uint8_t* src_b, int count) {
|
uint32_t HammingDistance_C(const uint8_t* src_a,
|
||||||
|
const uint8_t* src_b,
|
||||||
|
int count) {
|
||||||
uint32_t diff = 0u;
|
uint32_t diff = 0u;
|
||||||
|
|
||||||
int i;
|
int i;
|
||||||
for (i = 0; i < count - 3; i += 4) {
|
for (i = 0; i < count - 3; i += 4) {
|
||||||
uint32_t x = *((uint32_t*)src_a) ^ *((uint32_t*)src_b);
|
uint32_t x = *((uint32_t*)src_a) ^ *((uint32_t*)src_b); // NOLINT
|
||||||
uint32_t u = x - ((x >> 1) & 0x55555555);
|
uint32_t u = x - ((x >> 1) & 0x55555555);
|
||||||
u = ((u >> 2) & 0x33333333) + (u & 0x33333333);
|
u = ((u >> 2) & 0x33333333) + (u & 0x33333333);
|
||||||
diff += ((((u + (u >> 4)) & 0x0f0f0f0f) * 0x01010101) >> 24);
|
diff += ((((u + (u >> 4)) & 0x0f0f0f0f) * 0x01010101) >> 24);
|
||||||
@ -71,7 +75,9 @@ uint32_t HammingDistance_C(const uint8_t* src_a, const uint8_t* src_b, int count
|
|||||||
return diff;
|
return diff;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t SumSquareError_C(const uint8_t* src_a, const uint8_t* src_b, int count) {
|
uint32_t SumSquareError_C(const uint8_t* src_a,
|
||||||
|
const uint8_t* src_b,
|
||||||
|
int count) {
|
||||||
uint32_t sse = 0u;
|
uint32_t sse = 0u;
|
||||||
int i;
|
int i;
|
||||||
for (i = 0; i < count; ++i) {
|
for (i = 0; i < count; ++i) {
|
||||||
|
|||||||
@ -174,7 +174,9 @@ uint32_t HammingDistance_SSSE3(const uint8_t* src_a,
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_HAMMINGDISTANCE_AVX2
|
#ifdef HAS_HAMMINGDISTANCE_AVX2
|
||||||
uint32_t HammingDistance_AVX2(const uint8_t* src_a, const uint8_t* src_b, int count) {
|
uint32_t HammingDistance_AVX2(const uint8_t* src_a,
|
||||||
|
const uint8_t* src_b,
|
||||||
|
int count) {
|
||||||
uint32_t diff = 0u;
|
uint32_t diff = 0u;
|
||||||
|
|
||||||
asm volatile(
|
asm volatile(
|
||||||
@ -227,11 +229,14 @@ uint32_t HammingDistance_AVX2(const uint8_t* src_a, const uint8_t* src_b, int co
|
|||||||
}
|
}
|
||||||
#endif // HAS_HAMMINGDISTANCE_AVX2
|
#endif // HAS_HAMMINGDISTANCE_AVX2
|
||||||
|
|
||||||
uint32_t SumSquareError_SSE2(const uint8_t* src_a, const uint8_t* src_b, int count) {
|
uint32_t SumSquareError_SSE2(const uint8_t* src_a,
|
||||||
|
const uint8_t* src_b,
|
||||||
|
int count) {
|
||||||
uint32_t sse;
|
uint32_t sse;
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"pxor %%xmm0,%%xmm0 \n"
|
"pxor %%xmm0,%%xmm0 \n"
|
||||||
"pxor %%xmm5,%%xmm5 \n"
|
"pxor %%xmm5,%%xmm5 \n"
|
||||||
|
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqu (%0),%%xmm1 \n"
|
"movdqu (%0),%%xmm1 \n"
|
||||||
@ -262,8 +267,8 @@ uint32_t SumSquareError_SSE2(const uint8_t* src_a, const uint8_t* src_b, int cou
|
|||||||
"+r"(src_b), // %1
|
"+r"(src_b), // %1
|
||||||
"+r"(count), // %2
|
"+r"(count), // %2
|
||||||
"=g"(sse) // %3
|
"=g"(sse) // %3
|
||||||
:: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
|
::"memory",
|
||||||
);
|
"cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5");
|
||||||
return sse;
|
return sse;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -299,6 +304,7 @@ uint32_t HashDjb2_SSE41(const uint8_t* src, int count, uint32_t seed) {
|
|||||||
"movd %2,%%xmm0 \n"
|
"movd %2,%%xmm0 \n"
|
||||||
"pxor %%xmm7,%%xmm7 \n"
|
"pxor %%xmm7,%%xmm7 \n"
|
||||||
"movdqa %4,%%xmm6 \n"
|
"movdqa %4,%%xmm6 \n"
|
||||||
|
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqu (%0),%%xmm1 \n"
|
"movdqu (%0),%%xmm1 \n"
|
||||||
@ -342,9 +348,8 @@ uint32_t HashDjb2_SSE41(const uint8_t* src, int count, uint32_t seed) {
|
|||||||
"m"(kHashMul1), // %6
|
"m"(kHashMul1), // %6
|
||||||
"m"(kHashMul2), // %7
|
"m"(kHashMul2), // %7
|
||||||
"m"(kHashMul3) // %8
|
"m"(kHashMul3) // %8
|
||||||
: "memory", "cc"
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
||||||
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
|
"xmm7");
|
||||||
);
|
|
||||||
return hash;
|
return hash;
|
||||||
}
|
}
|
||||||
#endif // defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
|
#endif // defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
|
||||||
|
|||||||
@ -22,7 +22,9 @@ namespace libyuv {
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
uint32_t HammingDistance_MSA(const uint8_t* src_a, const uint8_t* src_b, int count) {
|
uint32_t HammingDistance_MSA(const uint8_t* src_a,
|
||||||
|
const uint8_t* src_b,
|
||||||
|
int count) {
|
||||||
uint32_t diff = 0u;
|
uint32_t diff = 0u;
|
||||||
int i;
|
int i;
|
||||||
v16u8 src0, src1, src2, src3;
|
v16u8 src0, src1, src2, src3;
|
||||||
@ -47,7 +49,9 @@ uint32_t HammingDistance_MSA(const uint8_t* src_a, const uint8_t* src_b, int cou
|
|||||||
return diff;
|
return diff;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t SumSquareError_MSA(const uint8_t* src_a, const uint8_t* src_b, int count) {
|
uint32_t SumSquareError_MSA(const uint8_t* src_a,
|
||||||
|
const uint8_t* src_b,
|
||||||
|
int count) {
|
||||||
uint32_t sse = 0u;
|
uint32_t sse = 0u;
|
||||||
int i;
|
int i;
|
||||||
v16u8 src0, src1, src2, src3;
|
v16u8 src0, src1, src2, src3;
|
||||||
|
|||||||
@ -23,7 +23,9 @@ extern "C" {
|
|||||||
|
|
||||||
// 256 bits at a time
|
// 256 bits at a time
|
||||||
// uses short accumulator which restricts count to 131 KB
|
// uses short accumulator which restricts count to 131 KB
|
||||||
uint32_t HammingDistance_NEON(const uint8_t* src_a, const uint8_t* src_b, int count) {
|
uint32_t HammingDistance_NEON(const uint8_t* src_a,
|
||||||
|
const uint8_t* src_b,
|
||||||
|
int count) {
|
||||||
uint32_t diff;
|
uint32_t diff;
|
||||||
|
|
||||||
asm volatile(
|
asm volatile(
|
||||||
@ -52,7 +54,9 @@ uint32_t HammingDistance_NEON(const uint8_t* src_a, const uint8_t* src_b, int co
|
|||||||
return diff;
|
return diff;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t SumSquareError_NEON(const uint8_t* src_a, const uint8_t* src_b, int count) {
|
uint32_t SumSquareError_NEON(const uint8_t* src_a,
|
||||||
|
const uint8_t* src_b,
|
||||||
|
int count) {
|
||||||
uint32_t sse;
|
uint32_t sse;
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"vmov.u8 q8, #0 \n"
|
"vmov.u8 q8, #0 \n"
|
||||||
|
|||||||
@ -22,7 +22,9 @@ extern "C" {
|
|||||||
|
|
||||||
// 256 bits at a time
|
// 256 bits at a time
|
||||||
// uses short accumulator which restricts count to 131 KB
|
// uses short accumulator which restricts count to 131 KB
|
||||||
uint32_t HammingDistance_NEON(const uint8_t* src_a, const uint8_t* src_b, int count) {
|
uint32_t HammingDistance_NEON(const uint8_t* src_a,
|
||||||
|
const uint8_t* src_b,
|
||||||
|
int count) {
|
||||||
uint32_t diff;
|
uint32_t diff;
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"movi v4.8h, #0 \n"
|
"movi v4.8h, #0 \n"
|
||||||
@ -47,7 +49,9 @@ uint32_t HammingDistance_NEON(const uint8_t* src_a, const uint8_t* src_b, int co
|
|||||||
return diff;
|
return diff;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t SumSquareError_NEON(const uint8_t* src_a, const uint8_t* src_b, int count) {
|
uint32_t SumSquareError_NEON(const uint8_t* src_a,
|
||||||
|
const uint8_t* src_b,
|
||||||
|
int count) {
|
||||||
uint32_t sse;
|
uint32_t sse;
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"eor v16.16b, v16.16b, v16.16b \n"
|
"eor v16.16b, v16.16b, v16.16b \n"
|
||||||
|
|||||||
@ -32,7 +32,7 @@ uint32_t HammingDistance_SSE42(const uint8_t* src_a,
|
|||||||
|
|
||||||
int i;
|
int i;
|
||||||
for (i = 0; i < count - 3; i += 4) {
|
for (i = 0; i < count - 3; i += 4) {
|
||||||
uint32_t x = *((uint32_t*)src_a) ^ *((uint32_t*)src_b);
|
uint32_t x = *((uint32_t*)src_a) ^ *((uint32_t*)src_b); // NOLINT
|
||||||
src_a += 4;
|
src_a += 4;
|
||||||
src_b += 4;
|
src_b += 4;
|
||||||
diff += __popcnt(x);
|
diff += __popcnt(x);
|
||||||
|
|||||||
@ -451,8 +451,9 @@ int YUY2ToI420(const uint8_t* src_yuy2,
|
|||||||
int width,
|
int width,
|
||||||
int height) {
|
int height) {
|
||||||
int y;
|
int y;
|
||||||
void (*YUY2ToUVRow)(const uint8_t* src_yuy2, int src_stride_yuy2, uint8_t* dst_u,
|
void (*YUY2ToUVRow)(const uint8_t* src_yuy2, int src_stride_yuy2,
|
||||||
uint8_t* dst_v, int width) = YUY2ToUVRow_C;
|
uint8_t* dst_u, uint8_t* dst_v, int width) =
|
||||||
|
YUY2ToUVRow_C;
|
||||||
void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) =
|
void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) =
|
||||||
YUY2ToYRow_C;
|
YUY2ToYRow_C;
|
||||||
// Negative height means invert the image.
|
// Negative height means invert the image.
|
||||||
@ -531,8 +532,9 @@ int UYVYToI420(const uint8_t* src_uyvy,
|
|||||||
int width,
|
int width,
|
||||||
int height) {
|
int height) {
|
||||||
int y;
|
int y;
|
||||||
void (*UYVYToUVRow)(const uint8_t* src_uyvy, int src_stride_uyvy, uint8_t* dst_u,
|
void (*UYVYToUVRow)(const uint8_t* src_uyvy, int src_stride_uyvy,
|
||||||
uint8_t* dst_v, int width) = UYVYToUVRow_C;
|
uint8_t* dst_u, uint8_t* dst_v, int width) =
|
||||||
|
UYVYToUVRow_C;
|
||||||
void (*UYVYToYRow)(const uint8_t* src_uyvy, uint8_t* dst_y, int width) =
|
void (*UYVYToYRow)(const uint8_t* src_uyvy, uint8_t* dst_y, int width) =
|
||||||
UYVYToYRow_C;
|
UYVYToYRow_C;
|
||||||
// Negative height means invert the image.
|
// Negative height means invert the image.
|
||||||
@ -611,8 +613,9 @@ int ARGBToI420(const uint8_t* src_argb,
|
|||||||
int width,
|
int width,
|
||||||
int height) {
|
int height) {
|
||||||
int y;
|
int y;
|
||||||
void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u,
|
void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
|
||||||
uint8_t* dst_v, int width) = ARGBToUVRow_C;
|
uint8_t* dst_u, uint8_t* dst_v, int width) =
|
||||||
|
ARGBToUVRow_C;
|
||||||
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
|
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
|
||||||
ARGBToYRow_C;
|
ARGBToYRow_C;
|
||||||
if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
|
if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
|
||||||
@ -706,8 +709,9 @@ int BGRAToI420(const uint8_t* src_bgra,
|
|||||||
int width,
|
int width,
|
||||||
int height) {
|
int height) {
|
||||||
int y;
|
int y;
|
||||||
void (*BGRAToUVRow)(const uint8_t* src_bgra0, int src_stride_bgra, uint8_t* dst_u,
|
void (*BGRAToUVRow)(const uint8_t* src_bgra0, int src_stride_bgra,
|
||||||
uint8_t* dst_v, int width) = BGRAToUVRow_C;
|
uint8_t* dst_u, uint8_t* dst_v, int width) =
|
||||||
|
BGRAToUVRow_C;
|
||||||
void (*BGRAToYRow)(const uint8_t* src_bgra, uint8_t* dst_y, int width) =
|
void (*BGRAToYRow)(const uint8_t* src_bgra, uint8_t* dst_y, int width) =
|
||||||
BGRAToYRow_C;
|
BGRAToYRow_C;
|
||||||
if (!src_bgra || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
|
if (!src_bgra || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
|
||||||
@ -791,8 +795,9 @@ int ABGRToI420(const uint8_t* src_abgr,
|
|||||||
int width,
|
int width,
|
||||||
int height) {
|
int height) {
|
||||||
int y;
|
int y;
|
||||||
void (*ABGRToUVRow)(const uint8_t* src_abgr0, int src_stride_abgr, uint8_t* dst_u,
|
void (*ABGRToUVRow)(const uint8_t* src_abgr0, int src_stride_abgr,
|
||||||
uint8_t* dst_v, int width) = ABGRToUVRow_C;
|
uint8_t* dst_u, uint8_t* dst_v, int width) =
|
||||||
|
ABGRToUVRow_C;
|
||||||
void (*ABGRToYRow)(const uint8_t* src_abgr, uint8_t* dst_y, int width) =
|
void (*ABGRToYRow)(const uint8_t* src_abgr, uint8_t* dst_y, int width) =
|
||||||
ABGRToYRow_C;
|
ABGRToYRow_C;
|
||||||
if (!src_abgr || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
|
if (!src_abgr || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
|
||||||
@ -876,8 +881,9 @@ int RGBAToI420(const uint8_t* src_rgba,
|
|||||||
int width,
|
int width,
|
||||||
int height) {
|
int height) {
|
||||||
int y;
|
int y;
|
||||||
void (*RGBAToUVRow)(const uint8_t* src_rgba0, int src_stride_rgba, uint8_t* dst_u,
|
void (*RGBAToUVRow)(const uint8_t* src_rgba0, int src_stride_rgba,
|
||||||
uint8_t* dst_v, int width) = RGBAToUVRow_C;
|
uint8_t* dst_u, uint8_t* dst_v, int width) =
|
||||||
|
RGBAToUVRow_C;
|
||||||
void (*RGBAToYRow)(const uint8_t* src_rgba, uint8_t* dst_y, int width) =
|
void (*RGBAToYRow)(const uint8_t* src_rgba, uint8_t* dst_y, int width) =
|
||||||
RGBAToYRow_C;
|
RGBAToYRow_C;
|
||||||
if (!src_rgba || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
|
if (!src_rgba || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
|
||||||
@ -963,14 +969,16 @@ int RGB24ToI420(const uint8_t* src_rgb24,
|
|||||||
int y;
|
int y;
|
||||||
#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA))
|
#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA))
|
||||||
void (*RGB24ToUVRow)(const uint8_t* src_rgb24, int src_stride_rgb24,
|
void (*RGB24ToUVRow)(const uint8_t* src_rgb24, int src_stride_rgb24,
|
||||||
uint8_t* dst_u, uint8_t* dst_v, int width) = RGB24ToUVRow_C;
|
uint8_t* dst_u, uint8_t* dst_v, int width) =
|
||||||
|
RGB24ToUVRow_C;
|
||||||
void (*RGB24ToYRow)(const uint8_t* src_rgb24, uint8_t* dst_y, int width) =
|
void (*RGB24ToYRow)(const uint8_t* src_rgb24, uint8_t* dst_y, int width) =
|
||||||
RGB24ToYRow_C;
|
RGB24ToYRow_C;
|
||||||
#else
|
#else
|
||||||
void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
|
void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
|
||||||
RGB24ToARGBRow_C;
|
RGB24ToARGBRow_C;
|
||||||
void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u,
|
void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
|
||||||
uint8_t* dst_v, int width) = ARGBToUVRow_C;
|
uint8_t* dst_u, uint8_t* dst_v, int width) =
|
||||||
|
ARGBToUVRow_C;
|
||||||
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
|
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
|
||||||
ARGBToYRow_C;
|
ARGBToYRow_C;
|
||||||
#endif
|
#endif
|
||||||
@ -1099,8 +1107,9 @@ int RAWToI420(const uint8_t* src_raw,
|
|||||||
#else
|
#else
|
||||||
void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
|
void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
|
||||||
RAWToARGBRow_C;
|
RAWToARGBRow_C;
|
||||||
void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u,
|
void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
|
||||||
uint8_t* dst_v, int width) = ARGBToUVRow_C;
|
uint8_t* dst_u, uint8_t* dst_v, int width) =
|
||||||
|
ARGBToUVRow_C;
|
||||||
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
|
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
|
||||||
ARGBToYRow_C;
|
ARGBToYRow_C;
|
||||||
#endif
|
#endif
|
||||||
@ -1228,10 +1237,11 @@ int RGB565ToI420(const uint8_t* src_rgb565,
|
|||||||
void (*RGB565ToYRow)(const uint8_t* src_rgb565, uint8_t* dst_y, int width) =
|
void (*RGB565ToYRow)(const uint8_t* src_rgb565, uint8_t* dst_y, int width) =
|
||||||
RGB565ToYRow_C;
|
RGB565ToYRow_C;
|
||||||
#else
|
#else
|
||||||
void (*RGB565ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
|
void (*RGB565ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb,
|
||||||
RGB565ToARGBRow_C;
|
int width) = RGB565ToARGBRow_C;
|
||||||
void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u,
|
void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
|
||||||
uint8_t* dst_v, int width) = ARGBToUVRow_C;
|
uint8_t* dst_u, uint8_t* dst_v, int width) =
|
||||||
|
ARGBToUVRow_C;
|
||||||
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
|
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
|
||||||
ARGBToYRow_C;
|
ARGBToYRow_C;
|
||||||
#endif
|
#endif
|
||||||
@ -1362,13 +1372,14 @@ int ARGB1555ToI420(const uint8_t* src_argb1555,
|
|||||||
void (*ARGB1555ToUVRow)(const uint8_t* src_argb1555, int src_stride_argb1555,
|
void (*ARGB1555ToUVRow)(const uint8_t* src_argb1555, int src_stride_argb1555,
|
||||||
uint8_t* dst_u, uint8_t* dst_v, int width) =
|
uint8_t* dst_u, uint8_t* dst_v, int width) =
|
||||||
ARGB1555ToUVRow_C;
|
ARGB1555ToUVRow_C;
|
||||||
void (*ARGB1555ToYRow)(const uint8_t* src_argb1555, uint8_t* dst_y, int width) =
|
void (*ARGB1555ToYRow)(const uint8_t* src_argb1555, uint8_t* dst_y,
|
||||||
ARGB1555ToYRow_C;
|
int width) = ARGB1555ToYRow_C;
|
||||||
#else
|
#else
|
||||||
void (*ARGB1555ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
|
void (*ARGB1555ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb,
|
||||||
ARGB1555ToARGBRow_C;
|
int width) = ARGB1555ToARGBRow_C;
|
||||||
void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u,
|
void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
|
||||||
uint8_t* dst_v, int width) = ARGBToUVRow_C;
|
uint8_t* dst_u, uint8_t* dst_v, int width) =
|
||||||
|
ARGBToUVRow_C;
|
||||||
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
|
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
|
||||||
ARGBToYRow_C;
|
ARGBToYRow_C;
|
||||||
#endif
|
#endif
|
||||||
@ -1503,13 +1514,14 @@ int ARGB4444ToI420(const uint8_t* src_argb4444,
|
|||||||
void (*ARGB4444ToUVRow)(const uint8_t* src_argb4444, int src_stride_argb4444,
|
void (*ARGB4444ToUVRow)(const uint8_t* src_argb4444, int src_stride_argb4444,
|
||||||
uint8_t* dst_u, uint8_t* dst_v, int width) =
|
uint8_t* dst_u, uint8_t* dst_v, int width) =
|
||||||
ARGB4444ToUVRow_C;
|
ARGB4444ToUVRow_C;
|
||||||
void (*ARGB4444ToYRow)(const uint8_t* src_argb4444, uint8_t* dst_y, int width) =
|
void (*ARGB4444ToYRow)(const uint8_t* src_argb4444, uint8_t* dst_y,
|
||||||
ARGB4444ToYRow_C;
|
int width) = ARGB4444ToYRow_C;
|
||||||
#else
|
#else
|
||||||
void (*ARGB4444ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
|
void (*ARGB4444ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb,
|
||||||
ARGB4444ToARGBRow_C;
|
int width) = ARGB4444ToARGBRow_C;
|
||||||
void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u,
|
void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
|
||||||
uint8_t* dst_v, int width) = ARGBToUVRow_C;
|
uint8_t* dst_u, uint8_t* dst_v, int width) =
|
||||||
|
ARGBToUVRow_C;
|
||||||
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
|
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
|
||||||
ARGBToYRow_C;
|
ARGBToYRow_C;
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -776,8 +776,8 @@ static int I420AlphaToARGBMatrix(const uint8_t* src_y,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) = I422AlphaToARGBRow_C;
|
int width) = I422AlphaToARGBRow_C;
|
||||||
void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) =
|
void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
|
||||||
ARGBAttenuateRow_C;
|
int width) = ARGBAttenuateRow_C;
|
||||||
if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
|
if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@ -1244,8 +1244,8 @@ int RGB565ToARGB(const uint8_t* src_rgb565,
|
|||||||
int width,
|
int width,
|
||||||
int height) {
|
int height) {
|
||||||
int y;
|
int y;
|
||||||
void (*RGB565ToARGBRow)(const uint8_t* src_rgb565, uint8_t* dst_argb, int width) =
|
void (*RGB565ToARGBRow)(const uint8_t* src_rgb565, uint8_t* dst_argb,
|
||||||
RGB565ToARGBRow_C;
|
int width) = RGB565ToARGBRow_C;
|
||||||
if (!src_rgb565 || !dst_argb || width <= 0 || height == 0) {
|
if (!src_rgb565 || !dst_argb || width <= 0 || height == 0) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@ -1481,9 +1481,9 @@ static int NV12ToARGBMatrix(const uint8_t* src_y,
|
|||||||
int width,
|
int width,
|
||||||
int height) {
|
int height) {
|
||||||
int y;
|
int y;
|
||||||
void (*NV12ToARGBRow)(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf,
|
void (*NV12ToARGBRow)(
|
||||||
const struct YuvConstants* yuvconstants, int width) =
|
const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf,
|
||||||
NV12ToARGBRow_C;
|
const struct YuvConstants* yuvconstants, int width) = NV12ToARGBRow_C;
|
||||||
if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) {
|
if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@ -1548,9 +1548,9 @@ static int NV21ToARGBMatrix(const uint8_t* src_y,
|
|||||||
int width,
|
int width,
|
||||||
int height) {
|
int height) {
|
||||||
int y;
|
int y;
|
||||||
void (*NV21ToARGBRow)(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf,
|
void (*NV21ToARGBRow)(
|
||||||
const struct YuvConstants* yuvconstants, int width) =
|
const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf,
|
||||||
NV21ToARGBRow_C;
|
const struct YuvConstants* yuvconstants, int width) = NV21ToARGBRow_C;
|
||||||
if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) {
|
if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@ -1670,9 +1670,9 @@ int M420ToARGB(const uint8_t* src_m420,
|
|||||||
int width,
|
int width,
|
||||||
int height) {
|
int height) {
|
||||||
int y;
|
int y;
|
||||||
void (*NV12ToARGBRow)(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf,
|
void (*NV12ToARGBRow)(
|
||||||
const struct YuvConstants* yuvconstants, int width) =
|
const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf,
|
||||||
NV12ToARGBRow_C;
|
const struct YuvConstants* yuvconstants, int width) = NV12ToARGBRow_C;
|
||||||
if (!src_m420 || !dst_argb || width <= 0 || height == 0) {
|
if (!src_m420 || !dst_argb || width <= 0 || height == 0) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -35,8 +35,8 @@ int ARGBToI444(const uint8_t* src_argb,
|
|||||||
int y;
|
int y;
|
||||||
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
|
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
|
||||||
ARGBToYRow_C;
|
ARGBToYRow_C;
|
||||||
void (*ARGBToUV444Row)(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v,
|
void (*ARGBToUV444Row)(const uint8_t* src_argb, uint8_t* dst_u,
|
||||||
int width) = ARGBToUV444Row_C;
|
uint8_t* dst_v, int width) = ARGBToUV444Row_C;
|
||||||
if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
|
if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@ -133,8 +133,9 @@ int ARGBToI422(const uint8_t* src_argb,
|
|||||||
int width,
|
int width,
|
||||||
int height) {
|
int height) {
|
||||||
int y;
|
int y;
|
||||||
void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u,
|
void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
|
||||||
uint8_t* dst_v, int width) = ARGBToUVRow_C;
|
uint8_t* dst_u, uint8_t* dst_v, int width) =
|
||||||
|
ARGBToUVRow_C;
|
||||||
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
|
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
|
||||||
ARGBToYRow_C;
|
ARGBToYRow_C;
|
||||||
if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
|
if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
|
||||||
@ -229,12 +230,13 @@ int ARGBToNV12(const uint8_t* src_argb,
|
|||||||
int height) {
|
int height) {
|
||||||
int y;
|
int y;
|
||||||
int halfwidth = (width + 1) >> 1;
|
int halfwidth = (width + 1) >> 1;
|
||||||
void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u,
|
void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
|
||||||
uint8_t* dst_v, int width) = ARGBToUVRow_C;
|
uint8_t* dst_u, uint8_t* dst_v, int width) =
|
||||||
|
ARGBToUVRow_C;
|
||||||
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
|
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
|
||||||
ARGBToYRow_C;
|
ARGBToYRow_C;
|
||||||
void (*MergeUVRow_)(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv,
|
void (*MergeUVRow_)(const uint8_t* src_u, const uint8_t* src_v,
|
||||||
int width) = MergeUVRow_C;
|
uint8_t* dst_uv, int width) = MergeUVRow_C;
|
||||||
if (!src_argb || !dst_y || !dst_uv || width <= 0 || height == 0) {
|
if (!src_argb || !dst_y || !dst_uv || width <= 0 || height == 0) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@ -364,12 +366,13 @@ int ARGBToNV21(const uint8_t* src_argb,
|
|||||||
int height) {
|
int height) {
|
||||||
int y;
|
int y;
|
||||||
int halfwidth = (width + 1) >> 1;
|
int halfwidth = (width + 1) >> 1;
|
||||||
void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u,
|
void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
|
||||||
uint8_t* dst_v, int width) = ARGBToUVRow_C;
|
uint8_t* dst_u, uint8_t* dst_v, int width) =
|
||||||
|
ARGBToUVRow_C;
|
||||||
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
|
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
|
||||||
ARGBToYRow_C;
|
ARGBToYRow_C;
|
||||||
void (*MergeUVRow_)(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv,
|
void (*MergeUVRow_)(const uint8_t* src_u, const uint8_t* src_v,
|
||||||
int width) = MergeUVRow_C;
|
uint8_t* dst_uv, int width) = MergeUVRow_C;
|
||||||
if (!src_argb || !dst_y || !dst_uv || width <= 0 || height == 0) {
|
if (!src_argb || !dst_y || !dst_uv || width <= 0 || height == 0) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@ -496,8 +499,9 @@ int ARGBToYUY2(const uint8_t* src_argb,
|
|||||||
int width,
|
int width,
|
||||||
int height) {
|
int height) {
|
||||||
int y;
|
int y;
|
||||||
void (*ARGBToUVRow)(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u,
|
void (*ARGBToUVRow)(const uint8_t* src_argb, int src_stride_argb,
|
||||||
uint8_t* dst_v, int width) = ARGBToUVRow_C;
|
uint8_t* dst_u, uint8_t* dst_v, int width) =
|
||||||
|
ARGBToUVRow_C;
|
||||||
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
|
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
|
||||||
ARGBToYRow_C;
|
ARGBToYRow_C;
|
||||||
void (*I422ToYUY2Row)(const uint8_t* src_y, const uint8_t* src_u,
|
void (*I422ToYUY2Row)(const uint8_t* src_y, const uint8_t* src_u,
|
||||||
@ -624,8 +628,9 @@ int ARGBToUYVY(const uint8_t* src_argb,
|
|||||||
int width,
|
int width,
|
||||||
int height) {
|
int height) {
|
||||||
int y;
|
int y;
|
||||||
void (*ARGBToUVRow)(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u,
|
void (*ARGBToUVRow)(const uint8_t* src_argb, int src_stride_argb,
|
||||||
uint8_t* dst_v, int width) = ARGBToUVRow_C;
|
uint8_t* dst_u, uint8_t* dst_v, int width) =
|
||||||
|
ARGBToUVRow_C;
|
||||||
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
|
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
|
||||||
ARGBToYRow_C;
|
ARGBToYRow_C;
|
||||||
void (*I422ToUYVYRow)(const uint8_t* src_y, const uint8_t* src_u,
|
void (*I422ToUYVYRow)(const uint8_t* src_y, const uint8_t* src_u,
|
||||||
@ -1005,7 +1010,7 @@ int ARGBToRGB565Dither(const uint8_t* src_argb,
|
|||||||
|
|
||||||
for (y = 0; y < height; ++y) {
|
for (y = 0; y < height; ++y) {
|
||||||
ARGBToRGB565DitherRow(src_argb, dst_rgb565,
|
ARGBToRGB565DitherRow(src_argb, dst_rgb565,
|
||||||
*(uint32_t*)(dither4x4 + ((y & 3) << 2)),
|
*(uint32_t*)(dither4x4 + ((y & 3) << 2)), // NOLINT
|
||||||
width); /* NOLINT */
|
width); /* NOLINT */
|
||||||
src_argb += src_stride_argb;
|
src_argb += src_stride_argb;
|
||||||
dst_rgb565 += dst_stride_rgb565;
|
dst_rgb565 += dst_stride_rgb565;
|
||||||
@ -1023,8 +1028,8 @@ int ARGBToRGB565(const uint8_t* src_argb,
|
|||||||
int width,
|
int width,
|
||||||
int height) {
|
int height) {
|
||||||
int y;
|
int y;
|
||||||
void (*ARGBToRGB565Row)(const uint8_t* src_argb, uint8_t* dst_rgb, int width) =
|
void (*ARGBToRGB565Row)(const uint8_t* src_argb, uint8_t* dst_rgb,
|
||||||
ARGBToRGB565Row_C;
|
int width) = ARGBToRGB565Row_C;
|
||||||
if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) {
|
if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@ -1089,8 +1094,8 @@ int ARGBToARGB1555(const uint8_t* src_argb,
|
|||||||
int width,
|
int width,
|
||||||
int height) {
|
int height) {
|
||||||
int y;
|
int y;
|
||||||
void (*ARGBToARGB1555Row)(const uint8_t* src_argb, uint8_t* dst_rgb, int width) =
|
void (*ARGBToARGB1555Row)(const uint8_t* src_argb, uint8_t* dst_rgb,
|
||||||
ARGBToARGB1555Row_C;
|
int width) = ARGBToARGB1555Row_C;
|
||||||
if (!src_argb || !dst_argb1555 || width <= 0 || height == 0) {
|
if (!src_argb || !dst_argb1555 || width <= 0 || height == 0) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@ -1155,8 +1160,8 @@ int ARGBToARGB4444(const uint8_t* src_argb,
|
|||||||
int width,
|
int width,
|
||||||
int height) {
|
int height) {
|
||||||
int y;
|
int y;
|
||||||
void (*ARGBToARGB4444Row)(const uint8_t* src_argb, uint8_t* dst_rgb, int width) =
|
void (*ARGBToARGB4444Row)(const uint8_t* src_argb, uint8_t* dst_rgb,
|
||||||
ARGBToARGB4444Row_C;
|
int width) = ARGBToARGB4444Row_C;
|
||||||
if (!src_argb || !dst_argb4444 || width <= 0 || height == 0) {
|
if (!src_argb || !dst_argb4444 || width <= 0 || height == 0) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@ -1275,7 +1280,8 @@ int ARGBToJ420(const uint8_t* src_argb,
|
|||||||
int height) {
|
int height) {
|
||||||
int y;
|
int y;
|
||||||
void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb,
|
void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb,
|
||||||
uint8_t* dst_u, uint8_t* dst_v, int width) = ARGBToUVJRow_C;
|
uint8_t* dst_u, uint8_t* dst_v, int width) =
|
||||||
|
ARGBToUVJRow_C;
|
||||||
void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) =
|
void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) =
|
||||||
ARGBToYJRow_C;
|
ARGBToYJRow_C;
|
||||||
if (!src_argb || !dst_yj || !dst_u || !dst_v || width <= 0 || height == 0) {
|
if (!src_argb || !dst_yj || !dst_u || !dst_v || width <= 0 || height == 0) {
|
||||||
@ -1368,7 +1374,8 @@ int ARGBToJ422(const uint8_t* src_argb,
|
|||||||
int height) {
|
int height) {
|
||||||
int y;
|
int y;
|
||||||
void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb,
|
void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb,
|
||||||
uint8_t* dst_u, uint8_t* dst_v, int width) = ARGBToUVJRow_C;
|
uint8_t* dst_u, uint8_t* dst_v, int width) =
|
||||||
|
ARGBToUVJRow_C;
|
||||||
void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) =
|
void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) =
|
||||||
ARGBToYJRow_C;
|
ARGBToYJRow_C;
|
||||||
if (!src_argb || !dst_yj || !dst_u || !dst_v || width <= 0 || height == 0) {
|
if (!src_argb || !dst_yj || !dst_u || !dst_v || width <= 0 || height == 0) {
|
||||||
|
|||||||
@ -89,7 +89,10 @@ static void JpegI400ToI420(void* opaque,
|
|||||||
|
|
||||||
// Query size of MJPG in pixels.
|
// Query size of MJPG in pixels.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int MJPGSize(const uint8_t* sample, size_t sample_size, int* width, int* height) {
|
int MJPGSize(const uint8_t* sample,
|
||||||
|
size_t sample_size,
|
||||||
|
int* width,
|
||||||
|
int* height) {
|
||||||
MJpegDecoder mjpeg_decoder;
|
MJpegDecoder mjpeg_decoder;
|
||||||
LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
|
LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
|
|||||||
@ -430,8 +430,8 @@ void MergeUVPlane(const uint8_t* src_u,
|
|||||||
int width,
|
int width,
|
||||||
int height) {
|
int height) {
|
||||||
int y;
|
int y;
|
||||||
void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv,
|
void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v,
|
||||||
int width) = MergeUVRow_C;
|
uint8_t* dst_uv, int width) = MergeUVRow_C;
|
||||||
// Coalesce rows.
|
// Coalesce rows.
|
||||||
// Negative height means invert the image.
|
// Negative height means invert the image.
|
||||||
if (height < 0) {
|
if (height < 0) {
|
||||||
@ -673,8 +673,8 @@ int YUY2ToI422(const uint8_t* src_yuy2,
|
|||||||
int width,
|
int width,
|
||||||
int height) {
|
int height) {
|
||||||
int y;
|
int y;
|
||||||
void (*YUY2ToUV422Row)(const uint8_t* src_yuy2, uint8_t* dst_u, uint8_t* dst_v,
|
void (*YUY2ToUV422Row)(const uint8_t* src_yuy2, uint8_t* dst_u,
|
||||||
int width) = YUY2ToUV422Row_C;
|
uint8_t* dst_v, int width) = YUY2ToUV422Row_C;
|
||||||
void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) =
|
void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) =
|
||||||
YUY2ToYRow_C;
|
YUY2ToYRow_C;
|
||||||
if (!src_yuy2 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
|
if (!src_yuy2 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
|
||||||
@ -759,8 +759,8 @@ int UYVYToI422(const uint8_t* src_uyvy,
|
|||||||
int width,
|
int width,
|
||||||
int height) {
|
int height) {
|
||||||
int y;
|
int y;
|
||||||
void (*UYVYToUV422Row)(const uint8_t* src_uyvy, uint8_t* dst_u, uint8_t* dst_v,
|
void (*UYVYToUV422Row)(const uint8_t* src_uyvy, uint8_t* dst_u,
|
||||||
int width) = UYVYToUV422Row_C;
|
uint8_t* dst_v, int width) = UYVYToUV422Row_C;
|
||||||
void (*UYVYToYRow)(const uint8_t* src_uyvy, uint8_t* dst_y, int width) =
|
void (*UYVYToYRow)(const uint8_t* src_uyvy, uint8_t* dst_y, int width) =
|
||||||
UYVYToYRow_C;
|
UYVYToYRow_C;
|
||||||
if (!src_uyvy || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
|
if (!src_uyvy || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
|
||||||
@ -1287,8 +1287,8 @@ int ARGBMultiply(const uint8_t* src_argb0,
|
|||||||
int width,
|
int width,
|
||||||
int height) {
|
int height) {
|
||||||
int y;
|
int y;
|
||||||
void (*ARGBMultiplyRow)(const uint8_t* src0, const uint8_t* src1, uint8_t* dst,
|
void (*ARGBMultiplyRow)(const uint8_t* src0, const uint8_t* src1,
|
||||||
int width) = ARGBMultiplyRow_C;
|
uint8_t* dst, int width) = ARGBMultiplyRow_C;
|
||||||
if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
|
if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@ -1436,8 +1436,8 @@ int ARGBSubtract(const uint8_t* src_argb0,
|
|||||||
int width,
|
int width,
|
||||||
int height) {
|
int height) {
|
||||||
int y;
|
int y;
|
||||||
void (*ARGBSubtractRow)(const uint8_t* src0, const uint8_t* src1, uint8_t* dst,
|
void (*ARGBSubtractRow)(const uint8_t* src0, const uint8_t* src1,
|
||||||
int width) = ARGBSubtractRow_C;
|
uint8_t* dst, int width) = ARGBSubtractRow_C;
|
||||||
if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
|
if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@ -1822,7 +1822,8 @@ int ARGBRect(uint8_t* dst_argb,
|
|||||||
int height,
|
int height,
|
||||||
uint32_t value) {
|
uint32_t value) {
|
||||||
int y;
|
int y;
|
||||||
void (*ARGBSetRow)(uint8_t * dst_argb, uint32_t value, int width) = ARGBSetRow_C;
|
void (*ARGBSetRow)(uint8_t * dst_argb, uint32_t value, int width) =
|
||||||
|
ARGBSetRow_C;
|
||||||
if (!dst_argb || width <= 0 || height == 0 || dst_x < 0 || dst_y < 0) {
|
if (!dst_argb || width <= 0 || height == 0 || dst_x < 0 || dst_y < 0) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@ -1890,8 +1891,8 @@ int ARGBAttenuate(const uint8_t* src_argb,
|
|||||||
int width,
|
int width,
|
||||||
int height) {
|
int height) {
|
||||||
int y;
|
int y;
|
||||||
void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) =
|
void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
|
||||||
ARGBAttenuateRow_C;
|
int width) = ARGBAttenuateRow_C;
|
||||||
if (!src_argb || !dst_argb || width <= 0 || height == 0) {
|
if (!src_argb || !dst_argb || width <= 0 || height == 0) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@ -2399,9 +2400,9 @@ int ARGBBlur(const uint8_t* src_argb,
|
|||||||
void (*ComputeCumulativeSumRow)(const uint8_t* row, int32_t* cumsum,
|
void (*ComputeCumulativeSumRow)(const uint8_t* row, int32_t* cumsum,
|
||||||
const int32_t* previous_cumsum, int width) =
|
const int32_t* previous_cumsum, int width) =
|
||||||
ComputeCumulativeSumRow_C;
|
ComputeCumulativeSumRow_C;
|
||||||
void (*CumulativeSumToAverageRow)(const int32_t* topleft, const int32_t* botleft,
|
void (*CumulativeSumToAverageRow)(
|
||||||
int width, int area, uint8_t* dst,
|
const int32_t* topleft, const int32_t* botleft, int width, int area,
|
||||||
int count) = CumulativeSumToAverageRow_C;
|
uint8_t* dst, int count) = CumulativeSumToAverageRow_C;
|
||||||
int32_t* cumsum_bot_row;
|
int32_t* cumsum_bot_row;
|
||||||
int32_t* max_cumsum_bot_row;
|
int32_t* max_cumsum_bot_row;
|
||||||
int32_t* cumsum_top_row;
|
int32_t* cumsum_top_row;
|
||||||
@ -2752,8 +2753,8 @@ static int ARGBSobelize(const uint8_t* src_argb,
|
|||||||
int y;
|
int y;
|
||||||
void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_g, int width) =
|
void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_g, int width) =
|
||||||
ARGBToYJRow_C;
|
ARGBToYJRow_C;
|
||||||
void (*SobelYRow)(const uint8_t* src_y0, const uint8_t* src_y1, uint8_t* dst_sobely,
|
void (*SobelYRow)(const uint8_t* src_y0, const uint8_t* src_y1,
|
||||||
int width) = SobelYRow_C;
|
uint8_t* dst_sobely, int width) = SobelYRow_C;
|
||||||
void (*SobelXRow)(const uint8_t* src_y0, const uint8_t* src_y1,
|
void (*SobelXRow)(const uint8_t* src_y0, const uint8_t* src_y1,
|
||||||
const uint8_t* src_y2, uint8_t* dst_sobely, int width) =
|
const uint8_t* src_y2, uint8_t* dst_sobely, int width) =
|
||||||
SobelXRow_C;
|
SobelXRow_C;
|
||||||
@ -3052,8 +3053,8 @@ int HalfFloatPlane(const uint16_t* src_y,
|
|||||||
int width,
|
int width,
|
||||||
int height) {
|
int height) {
|
||||||
int y;
|
int y;
|
||||||
void (*HalfFloatRow)(const uint16_t* src, uint16_t* dst, float scale, int width) =
|
void (*HalfFloatRow)(const uint16_t* src, uint16_t* dst, float scale,
|
||||||
HalfFloatRow_C;
|
int width) = HalfFloatRow_C;
|
||||||
if (!src_y || !dst_y || width <= 0 || height == 0) {
|
if (!src_y || !dst_y || width <= 0 || height == 0) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@ -3133,8 +3134,8 @@ int ARGBLumaColorTable(const uint8_t* src_argb,
|
|||||||
int height) {
|
int height) {
|
||||||
int y;
|
int y;
|
||||||
void (*ARGBLumaColorTableRow)(
|
void (*ARGBLumaColorTableRow)(
|
||||||
const uint8_t* src_argb, uint8_t* dst_argb, int width, const uint8_t* luma,
|
const uint8_t* src_argb, uint8_t* dst_argb, int width,
|
||||||
const uint32_t lumacoeff) = ARGBLumaColorTableRow_C;
|
const uint8_t* luma, const uint32_t lumacoeff) = ARGBLumaColorTableRow_C;
|
||||||
if (!src_argb || !dst_argb || !luma || width <= 0 || height == 0) {
|
if (!src_argb || !dst_argb || !luma || width <= 0 || height == 0) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@ -3173,8 +3174,8 @@ int ARGBCopyAlpha(const uint8_t* src_argb,
|
|||||||
int width,
|
int width,
|
||||||
int height) {
|
int height) {
|
||||||
int y;
|
int y;
|
||||||
void (*ARGBCopyAlphaRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) =
|
void (*ARGBCopyAlphaRow)(const uint8_t* src_argb, uint8_t* dst_argb,
|
||||||
ARGBCopyAlphaRow_C;
|
int width) = ARGBCopyAlphaRow_C;
|
||||||
if (!src_argb || !dst_argb || width <= 0 || height == 0) {
|
if (!src_argb || !dst_argb || width <= 0 || height == 0) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@ -3238,8 +3239,8 @@ int ARGBExtractAlpha(const uint8_t* src_argb,
|
|||||||
height = 1;
|
height = 1;
|
||||||
src_stride = dst_stride = 0;
|
src_stride = dst_stride = 0;
|
||||||
}
|
}
|
||||||
void (*ARGBExtractAlphaRow)(const uint8_t* src_argb, uint8_t* dst_a, int width) =
|
void (*ARGBExtractAlphaRow)(const uint8_t* src_argb, uint8_t* dst_a,
|
||||||
ARGBExtractAlphaRow_C;
|
int width) = ARGBExtractAlphaRow_C;
|
||||||
#if defined(HAS_ARGBEXTRACTALPHAROW_SSE2)
|
#if defined(HAS_ARGBEXTRACTALPHAROW_SSE2)
|
||||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||||
ARGBExtractAlphaRow = IS_ALIGNED(width, 8) ? ARGBExtractAlphaRow_SSE2
|
ARGBExtractAlphaRow = IS_ALIGNED(width, 8) ? ARGBExtractAlphaRow_SSE2
|
||||||
@ -3282,8 +3283,8 @@ int ARGBCopyYToAlpha(const uint8_t* src_y,
|
|||||||
int width,
|
int width,
|
||||||
int height) {
|
int height) {
|
||||||
int y;
|
int y;
|
||||||
void (*ARGBCopyYToAlphaRow)(const uint8_t* src_y, uint8_t* dst_argb, int width) =
|
void (*ARGBCopyYToAlphaRow)(const uint8_t* src_y, uint8_t* dst_argb,
|
||||||
ARGBCopyYToAlphaRow_C;
|
int width) = ARGBCopyYToAlphaRow_C;
|
||||||
if (!src_y || !dst_argb || width <= 0 || height == 0) {
|
if (!src_y || !dst_argb || width <= 0 || height == 0) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -314,8 +314,8 @@ void RotateUV180(const uint8_t* src,
|
|||||||
int width,
|
int width,
|
||||||
int height) {
|
int height) {
|
||||||
int i;
|
int i;
|
||||||
void (*MirrorUVRow)(const uint8_t* src, uint8_t* dst_u, uint8_t* dst_v, int width) =
|
void (*MirrorUVRow)(const uint8_t* src, uint8_t* dst_u, uint8_t* dst_v,
|
||||||
MirrorUVRow_C;
|
int width) = MirrorUVRow_C;
|
||||||
#if defined(HAS_MIRRORUVROW_NEON)
|
#if defined(HAS_MIRRORUVROW_NEON)
|
||||||
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
|
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
|
||||||
MirrorUVRow = MirrorUVRow_NEON;
|
MirrorUVRow = MirrorUVRow_NEON;
|
||||||
|
|||||||
@ -19,8 +19,8 @@ extern "C" {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define TANY(NAMEANY, TPOS_SIMD, MASK) \
|
#define TANY(NAMEANY, TPOS_SIMD, MASK) \
|
||||||
void NAMEANY(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, \
|
void NAMEANY(const uint8_t* src, int src_stride, uint8_t* dst, \
|
||||||
int width) { \
|
int dst_stride, int width) { \
|
||||||
int r = width & MASK; \
|
int r = width & MASK; \
|
||||||
int n = width - r; \
|
int n = width - r; \
|
||||||
if (n > 0) { \
|
if (n > 0) { \
|
||||||
@ -45,7 +45,8 @@ TANY(TransposeWx16_Any_MSA, TransposeWx16_MSA, 15)
|
|||||||
|
|
||||||
#define TUVANY(NAMEANY, TPOS_SIMD, MASK) \
|
#define TUVANY(NAMEANY, TPOS_SIMD, MASK) \
|
||||||
void NAMEANY(const uint8_t* src, int src_stride, uint8_t* dst_a, \
|
void NAMEANY(const uint8_t* src, int src_stride, uint8_t* dst_a, \
|
||||||
int dst_stride_a, uint8_t* dst_b, int dst_stride_b, int width) { \
|
int dst_stride_a, uint8_t* dst_b, int dst_stride_b, \
|
||||||
|
int width) { \
|
||||||
int r = width & MASK; \
|
int r = width & MASK; \
|
||||||
int n = width - r; \
|
int n = width - r; \
|
||||||
if (n > 0) { \
|
if (n > 0) { \
|
||||||
|
|||||||
@ -32,8 +32,8 @@ extern "C" {
|
|||||||
|
|
||||||
// Any 4 planes to 1 with yuvconstants
|
// Any 4 planes to 1 with yuvconstants
|
||||||
#define ANY41C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
|
#define ANY41C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
|
||||||
void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, \
|
void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \
|
||||||
const uint8_t* a_buf, uint8_t* dst_ptr, \
|
const uint8_t* v_buf, const uint8_t* a_buf, uint8_t* dst_ptr, \
|
||||||
const struct YuvConstants* yuvconstants, int width) { \
|
const struct YuvConstants* yuvconstants, int width) { \
|
||||||
SIMD_ALIGNED(uint8_t temp[64 * 5]); \
|
SIMD_ALIGNED(uint8_t temp[64 * 5]); \
|
||||||
memset(temp, 0, 64 * 4); /* for msan */ \
|
memset(temp, 0, 64 * 4); /* for msan */ \
|
||||||
@ -68,8 +68,8 @@ ANY41C(I422AlphaToARGBRow_Any_MSA, I422AlphaToARGBRow_MSA, 1, 0, 4, 7)
|
|||||||
|
|
||||||
// Any 3 planes to 1.
|
// Any 3 planes to 1.
|
||||||
#define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
|
#define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
|
||||||
void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, \
|
void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \
|
||||||
uint8_t* dst_ptr, int width) { \
|
const uint8_t* v_buf, uint8_t* dst_ptr, int width) { \
|
||||||
SIMD_ALIGNED(uint8_t temp[64 * 4]); \
|
SIMD_ALIGNED(uint8_t temp[64 * 4]); \
|
||||||
memset(temp, 0, 64 * 3); /* for YUY2 and msan */ \
|
memset(temp, 0, 64 * 3); /* for YUY2 and msan */ \
|
||||||
int r = width & MASK; \
|
int r = width & MASK; \
|
||||||
@ -120,9 +120,9 @@ ANY31(BlendPlaneRow_Any_SSSE3, BlendPlaneRow_SSSE3, 0, 0, 1, 7)
|
|||||||
// on arm that subsamples 444 to 422 internally.
|
// on arm that subsamples 444 to 422 internally.
|
||||||
// Any 3 planes to 1 with yuvconstants
|
// Any 3 planes to 1 with yuvconstants
|
||||||
#define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
|
#define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
|
||||||
void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, \
|
void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \
|
||||||
uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, \
|
const uint8_t* v_buf, uint8_t* dst_ptr, \
|
||||||
int width) { \
|
const struct YuvConstants* yuvconstants, int width) { \
|
||||||
SIMD_ALIGNED(uint8_t temp[64 * 4]); \
|
SIMD_ALIGNED(uint8_t temp[64 * 4]); \
|
||||||
memset(temp, 0, 64 * 3); /* for YUY2 and msan */ \
|
memset(temp, 0, 64 * 3); /* for YUY2 and msan */ \
|
||||||
int r = width & MASK; \
|
int r = width & MASK; \
|
||||||
@ -200,8 +200,9 @@ ANY31C(I422ToRGB565Row_Any_MSA, I422ToRGB565Row_MSA, 1, 0, 2, 7)
|
|||||||
// Any 3 planes of 16 bit to 1 with yuvconstants
|
// Any 3 planes of 16 bit to 1 with yuvconstants
|
||||||
// TODO(fbarchard): consider sharing this code with ANY31C
|
// TODO(fbarchard): consider sharing this code with ANY31C
|
||||||
#define ANY31CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK) \
|
#define ANY31CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK) \
|
||||||
void NAMEANY(const T* y_buf, const T* u_buf, const T* v_buf, uint8_t* dst_ptr, \
|
void NAMEANY(const T* y_buf, const T* u_buf, const T* v_buf, \
|
||||||
const struct YuvConstants* yuvconstants, int width) { \
|
uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, \
|
||||||
|
int width) { \
|
||||||
SIMD_ALIGNED(T temp[16 * 3]); \
|
SIMD_ALIGNED(T temp[16 * 3]); \
|
||||||
SIMD_ALIGNED(uint8_t out[64]); \
|
SIMD_ALIGNED(uint8_t out[64]); \
|
||||||
memset(temp, 0, 16 * 3 * SBPP); /* for YUY2 and msan */ \
|
memset(temp, 0, 16 * 3 * SBPP); /* for YUY2 and msan */ \
|
||||||
@ -755,19 +756,38 @@ ANY11C(Convert16To8Row_Any_SSSE3,
|
|||||||
15)
|
15)
|
||||||
#endif
|
#endif
|
||||||
#ifdef HAS_CONVERT16TO8ROW_AVX2
|
#ifdef HAS_CONVERT16TO8ROW_AVX2
|
||||||
ANY11C(Convert16To8Row_Any_AVX2, Convert16To8Row_AVX2, 2, 1, uint16_t, uint8_t, 31)
|
ANY11C(Convert16To8Row_Any_AVX2,
|
||||||
|
Convert16To8Row_AVX2,
|
||||||
|
2,
|
||||||
|
1,
|
||||||
|
uint16_t,
|
||||||
|
uint8_t,
|
||||||
|
31)
|
||||||
#endif
|
#endif
|
||||||
#ifdef HAS_CONVERT8TO16ROW_SSE2
|
#ifdef HAS_CONVERT8TO16ROW_SSE2
|
||||||
ANY11C(Convert8To16Row_Any_SSE2, Convert8To16Row_SSE2, 1, 2, uint8_t, uint16_t, 15)
|
ANY11C(Convert8To16Row_Any_SSE2,
|
||||||
|
Convert8To16Row_SSE2,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
uint8_t,
|
||||||
|
uint16_t,
|
||||||
|
15)
|
||||||
#endif
|
#endif
|
||||||
#ifdef HAS_CONVERT8TO16ROW_AVX2
|
#ifdef HAS_CONVERT8TO16ROW_AVX2
|
||||||
ANY11C(Convert8To16Row_Any_AVX2, Convert8To16Row_AVX2, 1, 2, uint8_t, uint16_t, 31)
|
ANY11C(Convert8To16Row_Any_AVX2,
|
||||||
|
Convert8To16Row_AVX2,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
uint8_t,
|
||||||
|
uint16_t,
|
||||||
|
31)
|
||||||
#endif
|
#endif
|
||||||
#undef ANY11C
|
#undef ANY11C
|
||||||
|
|
||||||
// Any 1 to 1 with parameter and shorts to byte. BPP measures in shorts.
|
// Any 1 to 1 with parameter and shorts to byte. BPP measures in shorts.
|
||||||
#define ANY11P16(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK) \
|
#define ANY11P16(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK) \
|
||||||
void NAMEANY(const uint16_t* src_ptr, uint16_t* dst_ptr, T param, int width) { \
|
void NAMEANY(const uint16_t* src_ptr, uint16_t* dst_ptr, T param, \
|
||||||
|
int width) { \
|
||||||
SIMD_ALIGNED(uint16_t temp[32 * 2]); \
|
SIMD_ALIGNED(uint16_t temp[32 * 2]); \
|
||||||
memset(temp, 0, 64); /* for msan */ \
|
memset(temp, 0, 64); /* for msan */ \
|
||||||
int r = width & MASK; \
|
int r = width & MASK; \
|
||||||
@ -834,8 +854,8 @@ ANY11C(UYVYToARGBRow_Any_MSA, UYVYToARGBRow_MSA, 1, 4, 4, 7)
|
|||||||
|
|
||||||
// Any 1 to 1 interpolate. Takes 2 rows of source via stride.
|
// Any 1 to 1 interpolate. Takes 2 rows of source via stride.
|
||||||
#define ANY11T(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \
|
#define ANY11T(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \
|
||||||
void NAMEANY(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride_ptr, \
|
void NAMEANY(uint8_t* dst_ptr, const uint8_t* src_ptr, \
|
||||||
int width, int source_y_fraction) { \
|
ptrdiff_t src_stride_ptr, int width, int source_y_fraction) { \
|
||||||
SIMD_ALIGNED(uint8_t temp[64 * 3]); \
|
SIMD_ALIGNED(uint8_t temp[64 * 3]); \
|
||||||
memset(temp, 0, 64 * 2); /* for msan */ \
|
memset(temp, 0, 64 * 2); /* for msan */ \
|
||||||
int r = width & MASK; \
|
int r = width & MASK; \
|
||||||
@ -933,7 +953,8 @@ ANY1(ARGBSetRow_Any_MSA, ARGBSetRow_MSA, uint32_t, 4, 3)
|
|||||||
|
|
||||||
// Any 1 to 2. Outputs UV planes.
|
// Any 1 to 2. Outputs UV planes.
|
||||||
#define ANY12(NAMEANY, ANY_SIMD, UVSHIFT, BPP, DUVSHIFT, MASK) \
|
#define ANY12(NAMEANY, ANY_SIMD, UVSHIFT, BPP, DUVSHIFT, MASK) \
|
||||||
void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, int width) { \
|
void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, \
|
||||||
|
int width) { \
|
||||||
SIMD_ALIGNED(uint8_t temp[128 * 3]); \
|
SIMD_ALIGNED(uint8_t temp[128 * 3]); \
|
||||||
memset(temp, 0, 128); /* for msan */ \
|
memset(temp, 0, 128); /* for msan */ \
|
||||||
int r = width & MASK; \
|
int r = width & MASK; \
|
||||||
@ -984,8 +1005,8 @@ ANY12(UYVYToUV422Row_Any_MSA, UYVYToUV422Row_MSA, 1, 4, 1, 31)
|
|||||||
|
|
||||||
// Any 1 to 3. Outputs RGB planes.
|
// Any 1 to 3. Outputs RGB planes.
|
||||||
#define ANY13(NAMEANY, ANY_SIMD, BPP, MASK) \
|
#define ANY13(NAMEANY, ANY_SIMD, BPP, MASK) \
|
||||||
void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_r, uint8_t* dst_g, uint8_t* dst_b, \
|
void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_r, uint8_t* dst_g, \
|
||||||
int width) { \
|
uint8_t* dst_b, int width) { \
|
||||||
SIMD_ALIGNED(uint8_t temp[16 * 6]); \
|
SIMD_ALIGNED(uint8_t temp[16 * 6]); \
|
||||||
memset(temp, 0, 16 * 3); /* for msan */ \
|
memset(temp, 0, 16 * 3); /* for msan */ \
|
||||||
int r = width & MASK; \
|
int r = width & MASK; \
|
||||||
|
|||||||
@ -10,8 +10,8 @@
|
|||||||
|
|
||||||
#include "libyuv/row.h"
|
#include "libyuv/row.h"
|
||||||
|
|
||||||
#include <string.h> // For memcpy and memset.
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
#include <string.h> // For memcpy and memset.
|
||||||
|
|
||||||
#include "libyuv/basic_types.h"
|
#include "libyuv/basic_types.h"
|
||||||
|
|
||||||
@ -125,7 +125,9 @@ void RAWToRGB24Row_C(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RGB565ToARGBRow_C(const uint8_t* src_rgb565, uint8_t* dst_argb, int width) {
|
void RGB565ToARGBRow_C(const uint8_t* src_rgb565,
|
||||||
|
uint8_t* dst_argb,
|
||||||
|
int width) {
|
||||||
int x;
|
int x;
|
||||||
for (x = 0; x < width; ++x) {
|
for (x = 0; x < width; ++x) {
|
||||||
uint8_t b = src_rgb565[0] & 0x1f;
|
uint8_t b = src_rgb565[0] & 0x1f;
|
||||||
@ -315,8 +317,8 @@ void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
|
|||||||
uint8_t g1 = src_argb[5] >> 4;
|
uint8_t g1 = src_argb[5] >> 4;
|
||||||
uint8_t r1 = src_argb[6] >> 4;
|
uint8_t r1 = src_argb[6] >> 4;
|
||||||
uint8_t a1 = src_argb[7] >> 4;
|
uint8_t a1 = src_argb[7] >> 4;
|
||||||
*(uint32_t*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) | (b1 << 16) |
|
*(uint32_t*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) |
|
||||||
(g1 << 20) | (r1 << 24) | (a1 << 28);
|
(b1 << 16) | (g1 << 20) | (r1 << 24) | (a1 << 28);
|
||||||
dst_rgb += 4;
|
dst_rgb += 4;
|
||||||
src_argb += 8;
|
src_argb += 8;
|
||||||
}
|
}
|
||||||
@ -756,7 +758,9 @@ void ARGBColorMatrixRow_C(const uint8_t* src_argb,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Apply color table to a row of image.
|
// Apply color table to a row of image.
|
||||||
void ARGBColorTableRow_C(uint8_t* dst_argb, const uint8_t* table_argb, int width) {
|
void ARGBColorTableRow_C(uint8_t* dst_argb,
|
||||||
|
const uint8_t* table_argb,
|
||||||
|
int width) {
|
||||||
int x;
|
int x;
|
||||||
for (x = 0; x < width; ++x) {
|
for (x = 0; x < width; ++x) {
|
||||||
int b = dst_argb[0];
|
int b = dst_argb[0];
|
||||||
@ -772,7 +776,9 @@ void ARGBColorTableRow_C(uint8_t* dst_argb, const uint8_t* table_argb, int width
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Apply color table to a row of image.
|
// Apply color table to a row of image.
|
||||||
void RGBColorTableRow_C(uint8_t* dst_argb, const uint8_t* table_argb, int width) {
|
void RGBColorTableRow_C(uint8_t* dst_argb,
|
||||||
|
const uint8_t* table_argb,
|
||||||
|
int width) {
|
||||||
int x;
|
int x;
|
||||||
for (x = 0; x < width; ++x) {
|
for (x = 0; x < width; ++x) {
|
||||||
int b = dst_argb[0];
|
int b = dst_argb[0];
|
||||||
@ -1535,10 +1541,7 @@ void I210ToARGBRow_C(const uint16_t* src_y,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void StoreAR30(uint8_t* rgb_buf,
|
static void StoreAR30(uint8_t* rgb_buf, int b, int g, int r) {
|
||||||
int b,
|
|
||||||
int g,
|
|
||||||
int r) {
|
|
||||||
uint32_t ar30;
|
uint32_t ar30;
|
||||||
b = b >> 4; // convert 10.6 to 10 bit.
|
b = b >> 4; // convert 10.6 to 10 bit.
|
||||||
g = g >> 4;
|
g = g >> 4;
|
||||||
@ -1577,7 +1580,6 @@ void I210ToAR30Row_C(const uint16_t* src_y,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// 8 bit YUV to 10 bit AR30
|
// 8 bit YUV to 10 bit AR30
|
||||||
// Uses same code as 10 bit YUV bit shifts the 8 bit values up to 10 bits.
|
// Uses same code as 10 bit YUV bit shifts the 8 bit values up to 10 bits.
|
||||||
void I422ToAR30Row_C(const uint8_t* src_y,
|
void I422ToAR30Row_C(const uint8_t* src_y,
|
||||||
@ -1954,7 +1956,10 @@ void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void MirrorUVRow_C(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width) {
|
void MirrorUVRow_C(const uint8_t* src_uv,
|
||||||
|
uint8_t* dst_u,
|
||||||
|
uint8_t* dst_v,
|
||||||
|
int width) {
|
||||||
int x;
|
int x;
|
||||||
src_uv += (width - 1) << 1;
|
src_uv += (width - 1) << 1;
|
||||||
for (x = 0; x < width - 1; x += 2) {
|
for (x = 0; x < width - 1; x += 2) {
|
||||||
@ -1985,7 +1990,10 @@ void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void SplitUVRow_C(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width) {
|
void SplitUVRow_C(const uint8_t* src_uv,
|
||||||
|
uint8_t* dst_u,
|
||||||
|
uint8_t* dst_v,
|
||||||
|
int width) {
|
||||||
int x;
|
int x;
|
||||||
for (x = 0; x < width - 1; x += 2) {
|
for (x = 0; x < width - 1; x += 2) {
|
||||||
dst_u[x] = src_uv[0];
|
dst_u[x] = src_uv[0];
|
||||||
@ -2385,7 +2393,9 @@ const uint32_t fixed_invtbl8[256] = {
|
|||||||
T(0xfc), T(0xfd), T(0xfe), 0x01000100};
|
T(0xfc), T(0xfd), T(0xfe), 0x01000100};
|
||||||
#undef T
|
#undef T
|
||||||
|
|
||||||
void ARGBUnattenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
|
void ARGBUnattenuateRow_C(const uint8_t* src_argb,
|
||||||
|
uint8_t* dst_argb,
|
||||||
|
int width) {
|
||||||
int i;
|
int i;
|
||||||
for (i = 0; i < width; ++i) {
|
for (i = 0; i < width; ++i) {
|
||||||
uint32_t b = src_argb[0];
|
uint32_t b = src_argb[0];
|
||||||
@ -2673,7 +2683,10 @@ void ARGBPolynomialRow_C(const uint8_t* src_argb,
|
|||||||
// simply extract the low bits of the exponent and the high
|
// simply extract the low bits of the exponent and the high
|
||||||
// bits of the mantissa from our float and we're done.
|
// bits of the mantissa from our float and we're done.
|
||||||
|
|
||||||
void HalfFloatRow_C(const uint16_t* src, uint16_t* dst, float scale, int width) {
|
void HalfFloatRow_C(const uint16_t* src,
|
||||||
|
uint16_t* dst,
|
||||||
|
float scale,
|
||||||
|
int width) {
|
||||||
int i;
|
int i;
|
||||||
float mult = 1.9259299444e-34f * scale;
|
float mult = 1.9259299444e-34f * scale;
|
||||||
for (i = 0; i < width; ++i) {
|
for (i = 0; i < width; ++i) {
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@ -1014,7 +1014,9 @@ void ARGBToRGB565Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARGBToARGB1555Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
|
void ARGBToARGB1555Row_MSA(const uint8_t* src_argb,
|
||||||
|
uint8_t* dst_rgb,
|
||||||
|
int width) {
|
||||||
int x;
|
int x;
|
||||||
v16u8 src0, src1, dst0;
|
v16u8 src0, src1, dst0;
|
||||||
v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9;
|
v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9;
|
||||||
@ -1054,7 +1056,9 @@ void ARGBToARGB1555Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARGBToARGB4444Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
|
void ARGBToARGB4444Row_MSA(const uint8_t* src_argb,
|
||||||
|
uint8_t* dst_rgb,
|
||||||
|
int width) {
|
||||||
int x;
|
int x;
|
||||||
v16u8 src0, src1;
|
v16u8 src0, src1;
|
||||||
v16u8 vec0, vec1;
|
v16u8 vec0, vec1;
|
||||||
@ -1230,7 +1234,9 @@ void ARGBSubtractRow_MSA(const uint8_t* src_argb0,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARGBAttenuateRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
|
void ARGBAttenuateRow_MSA(const uint8_t* src_argb,
|
||||||
|
uint8_t* dst_argb,
|
||||||
|
int width) {
|
||||||
int x;
|
int x;
|
||||||
v16u8 src0, src1, dst0, dst1;
|
v16u8 src0, src1, dst0, dst1;
|
||||||
v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9;
|
v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9;
|
||||||
@ -1547,7 +1553,9 @@ void ARGB1555ToARGBRow_MSA(const uint8_t* src_argb1555,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RGB565ToARGBRow_MSA(const uint8_t* src_rgb565, uint8_t* dst_argb, int width) {
|
void RGB565ToARGBRow_MSA(const uint8_t* src_rgb565,
|
||||||
|
uint8_t* dst_argb,
|
||||||
|
int width) {
|
||||||
int x;
|
int x;
|
||||||
v8u16 src0, src1, vec0, vec1, vec2, vec3, vec4, vec5;
|
v8u16 src0, src1, vec0, vec1, vec2, vec3, vec4, vec5;
|
||||||
v8u16 reg0, reg1, reg2, reg3, reg4, reg5;
|
v8u16 reg0, reg1, reg2, reg3, reg4, reg5;
|
||||||
@ -1592,7 +1600,9 @@ void RGB565ToARGBRow_MSA(const uint8_t* src_rgb565, uint8_t* dst_argb, int width
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RGB24ToARGBRow_MSA(const uint8_t* src_rgb24, uint8_t* dst_argb, int width) {
|
void RGB24ToARGBRow_MSA(const uint8_t* src_rgb24,
|
||||||
|
uint8_t* dst_argb,
|
||||||
|
int width) {
|
||||||
int x;
|
int x;
|
||||||
v16u8 src0, src1, src2;
|
v16u8 src0, src1, src2;
|
||||||
v16u8 vec0, vec1, vec2;
|
v16u8 vec0, vec1, vec2;
|
||||||
@ -1642,7 +1652,9 @@ void RAWToARGBRow_MSA(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARGB1555ToYRow_MSA(const uint8_t* src_argb1555, uint8_t* dst_y, int width) {
|
void ARGB1555ToYRow_MSA(const uint8_t* src_argb1555,
|
||||||
|
uint8_t* dst_y,
|
||||||
|
int width) {
|
||||||
int x;
|
int x;
|
||||||
v8u16 src0, src1, vec0, vec1, vec2, vec3, vec4, vec5;
|
v8u16 src0, src1, vec0, vec1, vec2, vec3, vec4, vec5;
|
||||||
v8u16 reg0, reg1, reg2, reg3, reg4, reg5;
|
v8u16 reg0, reg1, reg2, reg3, reg4, reg5;
|
||||||
@ -2969,7 +2981,9 @@ void MergeUVRow_MSA(const uint8_t* src_u,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARGBExtractAlphaRow_MSA(const uint8_t* src_argb, uint8_t* dst_a, int width) {
|
void ARGBExtractAlphaRow_MSA(const uint8_t* src_argb,
|
||||||
|
uint8_t* dst_a,
|
||||||
|
int width) {
|
||||||
int i;
|
int i;
|
||||||
v16u8 src0, src1, src2, src3, vec0, vec1, dst0;
|
v16u8 src0, src1, src2, src3, vec0, vec1, dst0;
|
||||||
|
|
||||||
@ -3429,7 +3443,10 @@ void SobelYRow_MSA(const uint8_t* src_y0,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void HalfFloatRow_MSA(const uint16_t* src, uint16_t* dst, float scale, int width) {
|
void HalfFloatRow_MSA(const uint16_t* src,
|
||||||
|
uint16_t* dst,
|
||||||
|
float scale,
|
||||||
|
int width) {
|
||||||
int i;
|
int i;
|
||||||
v8u16 src0, src1, src2, src3, dst0, dst1, dst2, dst3;
|
v8u16 src0, src1, src2, src3, dst0, dst1, dst2, dst3;
|
||||||
v4u32 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
|
v4u32 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
|
||||||
|
|||||||
@ -694,7 +694,9 @@ void ARGBMirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width) {
|
|||||||
: "cc", "memory", "r3", "q0");
|
: "cc", "memory", "r3", "q0");
|
||||||
}
|
}
|
||||||
|
|
||||||
void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_argb, int width) {
|
void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24,
|
||||||
|
uint8_t* dst_argb,
|
||||||
|
int width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"vmov.u8 d4, #255 \n" // Alpha
|
"vmov.u8 d4, #255 \n" // Alpha
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -756,7 +758,9 @@ void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
|
|||||||
"vorr.u8 d2, d1, d5 \n" /* R */ \
|
"vorr.u8 d2, d1, d5 \n" /* R */ \
|
||||||
"vorr.u8 d1, d4, d6 \n" /* G */
|
"vorr.u8 d1, d4, d6 \n" /* G */
|
||||||
|
|
||||||
void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_argb, int width) {
|
void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565,
|
||||||
|
uint8_t* dst_argb,
|
||||||
|
int width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"vmov.u8 d3, #255 \n" // Alpha
|
"vmov.u8 d3, #255 \n" // Alpha
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -848,7 +852,9 @@ void ARGB4444ToARGBRow_NEON(const uint8_t* src_argb4444,
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARGBToRGB24Row_NEON(const uint8_t* src_argb, uint8_t* dst_rgb24, int width) {
|
void ARGBToRGB24Row_NEON(const uint8_t* src_argb,
|
||||||
|
uint8_t* dst_rgb24,
|
||||||
|
int width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vld4.8 {d1, d2, d3, d4}, [%0]! \n" // load 8 pixels of ARGB.
|
"vld4.8 {d1, d2, d3, d4}, [%0]! \n" // load 8 pixels of ARGB.
|
||||||
@ -1070,7 +1076,9 @@ void I422ToUYVYRow_NEON(const uint8_t* src_y,
|
|||||||
: "cc", "memory", "d0", "d1", "d2", "d3");
|
: "cc", "memory", "d0", "d1", "d2", "d3");
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARGBToRGB565Row_NEON(const uint8_t* src_argb, uint8_t* dst_rgb565, int width) {
|
void ARGBToRGB565Row_NEON(const uint8_t* src_argb,
|
||||||
|
uint8_t* dst_rgb565,
|
||||||
|
int width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB.
|
"vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB.
|
||||||
@ -1166,7 +1174,9 @@ void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) {
|
|||||||
: "cc", "memory", "q0", "q1", "q2", "q12", "q13");
|
: "cc", "memory", "q0", "q1", "q2", "q12", "q13");
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb, uint8_t* dst_a, int width) {
|
void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb,
|
||||||
|
uint8_t* dst_a,
|
||||||
|
int width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels
|
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels
|
||||||
@ -1798,7 +1808,9 @@ void RGB565ToYRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
|
|||||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13");
|
: "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13");
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555, uint8_t* dst_y, int width) {
|
void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555,
|
||||||
|
uint8_t* dst_y,
|
||||||
|
int width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"vmov.u8 d24, #13 \n" // B * 0.1016 coefficient
|
"vmov.u8 d24, #13 \n" // B * 0.1016 coefficient
|
||||||
"vmov.u8 d25, #65 \n" // G * 0.5078 coefficient
|
"vmov.u8 d25, #65 \n" // G * 0.5078 coefficient
|
||||||
@ -1822,7 +1834,9 @@ void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555, uint8_t* dst_y, int width)
|
|||||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13");
|
: "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13");
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444, uint8_t* dst_y, int width) {
|
void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444,
|
||||||
|
uint8_t* dst_y,
|
||||||
|
int width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"vmov.u8 d24, #13 \n" // B * 0.1016 coefficient
|
"vmov.u8 d24, #13 \n" // B * 0.1016 coefficient
|
||||||
"vmov.u8 d25, #65 \n" // G * 0.5078 coefficient
|
"vmov.u8 d25, #65 \n" // G * 0.5078 coefficient
|
||||||
@ -2081,7 +2095,9 @@ void ARGBBlendRow_NEON(const uint8_t* src_argb0,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Attenuate 8 pixels at a time.
|
// Attenuate 8 pixels at a time.
|
||||||
void ARGBAttenuateRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
|
void ARGBAttenuateRow_NEON(const uint8_t* src_argb,
|
||||||
|
uint8_t* dst_argb,
|
||||||
|
int width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
// Attenuate 8 pixels.
|
// Attenuate 8 pixels.
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -2561,7 +2577,10 @@ void HalfFloat1Row_NEON(const uint16_t* src, uint16_t* dst, float, int width) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// TODO(fbarchard): multiply by element.
|
// TODO(fbarchard): multiply by element.
|
||||||
void HalfFloatRow_NEON(const uint16_t* src, uint16_t* dst, float scale, int width) {
|
void HalfFloatRow_NEON(const uint16_t* src,
|
||||||
|
uint16_t* dst,
|
||||||
|
float scale,
|
||||||
|
int width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"vdup.32 q0, %3 \n"
|
"vdup.32 q0, %3 \n"
|
||||||
|
|
||||||
|
|||||||
@ -733,7 +733,9 @@ void ARGBMirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width) {
|
|||||||
: "cc", "memory", "v0");
|
: "cc", "memory", "v0");
|
||||||
}
|
}
|
||||||
|
|
||||||
void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_argb, int width) {
|
void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24,
|
||||||
|
uint8_t* dst_argb,
|
||||||
|
int width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"movi v4.8b, #255 \n" // Alpha
|
"movi v4.8b, #255 \n" // Alpha
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -797,7 +799,9 @@ void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
|
|||||||
"orr v0.16b, v0.16b, v2.16b \n" /* R,B */ \
|
"orr v0.16b, v0.16b, v2.16b \n" /* R,B */ \
|
||||||
"dup v2.2D, v0.D[1] \n" /* R */
|
"dup v2.2D, v0.D[1] \n" /* R */
|
||||||
|
|
||||||
void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_argb, int width) {
|
void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565,
|
||||||
|
uint8_t* dst_argb,
|
||||||
|
int width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"movi v3.8b, #255 \n" // Alpha
|
"movi v3.8b, #255 \n" // Alpha
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -902,7 +906,9 @@ void ARGB4444ToARGBRow_NEON(const uint8_t* src_argb4444,
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARGBToRGB24Row_NEON(const uint8_t* src_argb, uint8_t* dst_rgb24, int width) {
|
void ARGBToRGB24Row_NEON(const uint8_t* src_argb,
|
||||||
|
uint8_t* dst_rgb24,
|
||||||
|
int width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"ld4 {v1.8b,v2.8b,v3.8b,v4.8b}, [%0], #32 \n" // load 8 ARGB
|
"ld4 {v1.8b,v2.8b,v3.8b,v4.8b}, [%0], #32 \n" // load 8 ARGB
|
||||||
@ -1126,7 +1132,9 @@ void I422ToUYVYRow_NEON(const uint8_t* src_y,
|
|||||||
: "cc", "memory", "v0", "v1", "v2", "v3");
|
: "cc", "memory", "v0", "v1", "v2", "v3");
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARGBToRGB565Row_NEON(const uint8_t* src_argb, uint8_t* dst_rgb565, int width) {
|
void ARGBToRGB565Row_NEON(const uint8_t* src_argb,
|
||||||
|
uint8_t* dst_rgb565,
|
||||||
|
int width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%0], #32 \n" // load 8 pixels
|
"ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%0], #32 \n" // load 8 pixels
|
||||||
@ -1223,7 +1231,9 @@ void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) {
|
|||||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7");
|
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7");
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb, uint8_t* dst_a, int width) {
|
void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb,
|
||||||
|
uint8_t* dst_a,
|
||||||
|
int width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load row 16
|
"ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load row 16
|
||||||
@ -1829,7 +1839,9 @@ void RGB565ToYRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
|
|||||||
"v27");
|
"v27");
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555, uint8_t* dst_y, int width) {
|
void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555,
|
||||||
|
uint8_t* dst_y,
|
||||||
|
int width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"movi v4.8b, #13 \n" // B * 0.1016 coefficient
|
"movi v4.8b, #13 \n" // B * 0.1016 coefficient
|
||||||
"movi v5.8b, #65 \n" // G * 0.5078 coefficient
|
"movi v5.8b, #65 \n" // G * 0.5078 coefficient
|
||||||
@ -1853,7 +1865,9 @@ void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555, uint8_t* dst_y, int width)
|
|||||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7");
|
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7");
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444, uint8_t* dst_y, int width) {
|
void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444,
|
||||||
|
uint8_t* dst_y,
|
||||||
|
int width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"movi v24.8b, #13 \n" // B * 0.1016 coefficient
|
"movi v24.8b, #13 \n" // B * 0.1016 coefficient
|
||||||
"movi v25.8b, #65 \n" // G * 0.5078 coefficient
|
"movi v25.8b, #65 \n" // G * 0.5078 coefficient
|
||||||
@ -2121,7 +2135,9 @@ void ARGBBlendRow_NEON(const uint8_t* src_argb0,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Attenuate 8 pixels at a time.
|
// Attenuate 8 pixels at a time.
|
||||||
void ARGBAttenuateRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
|
void ARGBAttenuateRow_NEON(const uint8_t* src_argb,
|
||||||
|
uint8_t* dst_argb,
|
||||||
|
int width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
// Attenuate 8 pixels.
|
// Attenuate 8 pixels.
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -2604,7 +2620,10 @@ void HalfFloat1Row_NEON(const uint16_t* src, uint16_t* dst, float, int width) {
|
|||||||
: "cc", "memory", "v1", "v2", "v3");
|
: "cc", "memory", "v1", "v2", "v3");
|
||||||
}
|
}
|
||||||
|
|
||||||
void HalfFloatRow_NEON(const uint16_t* src, uint16_t* dst, float scale, int width) {
|
void HalfFloatRow_NEON(const uint16_t* src,
|
||||||
|
uint16_t* dst,
|
||||||
|
float scale,
|
||||||
|
int width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"ld1 {v1.16b}, [%0], #16 \n" // load 8 shorts
|
"ld1 {v1.16b}, [%0], #16 \n" // load 8 shorts
|
||||||
|
|||||||
@ -3022,7 +3022,9 @@ __declspec(naked) void MirrorRow_SSSE3(const uint8_t* src,
|
|||||||
#endif // HAS_MIRRORROW_SSSE3
|
#endif // HAS_MIRRORROW_SSSE3
|
||||||
|
|
||||||
#ifdef HAS_MIRRORROW_AVX2
|
#ifdef HAS_MIRRORROW_AVX2
|
||||||
__declspec(naked) void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width) {
|
__declspec(naked) void MirrorRow_AVX2(const uint8_t* src,
|
||||||
|
uint8_t* dst,
|
||||||
|
int width) {
|
||||||
__asm {
|
__asm {
|
||||||
mov eax, [esp + 4] // src
|
mov eax, [esp + 4] // src
|
||||||
mov edx, [esp + 8] // dst
|
mov edx, [esp + 8] // dst
|
||||||
@ -3274,7 +3276,9 @@ __declspec(naked) void MergeUVRow_AVX2(const uint8_t* src_u,
|
|||||||
|
|
||||||
#ifdef HAS_COPYROW_SSE2
|
#ifdef HAS_COPYROW_SSE2
|
||||||
// CopyRow copys 'count' bytes using a 16 byte load/store, 32 bytes at time.
|
// CopyRow copys 'count' bytes using a 16 byte load/store, 32 bytes at time.
|
||||||
__declspec(naked) void CopyRow_SSE2(const uint8_t* src, uint8_t* dst, int count) {
|
__declspec(naked) void CopyRow_SSE2(const uint8_t* src,
|
||||||
|
uint8_t* dst,
|
||||||
|
int count) {
|
||||||
__asm {
|
__asm {
|
||||||
mov eax, [esp + 4] // src
|
mov eax, [esp + 4] // src
|
||||||
mov edx, [esp + 8] // dst
|
mov edx, [esp + 8] // dst
|
||||||
@ -3311,7 +3315,9 @@ __declspec(naked) void CopyRow_SSE2(const uint8_t* src, uint8_t* dst, int count)
|
|||||||
|
|
||||||
#ifdef HAS_COPYROW_AVX
|
#ifdef HAS_COPYROW_AVX
|
||||||
// CopyRow copys 'count' bytes using a 32 byte load/store, 64 bytes at time.
|
// CopyRow copys 'count' bytes using a 32 byte load/store, 64 bytes at time.
|
||||||
__declspec(naked) void CopyRow_AVX(const uint8_t* src, uint8_t* dst, int count) {
|
__declspec(naked) void CopyRow_AVX(const uint8_t* src,
|
||||||
|
uint8_t* dst,
|
||||||
|
int count) {
|
||||||
__asm {
|
__asm {
|
||||||
mov eax, [esp + 4] // src
|
mov eax, [esp + 4] // src
|
||||||
mov edx, [esp + 8] // dst
|
mov edx, [esp + 8] // dst
|
||||||
@ -3334,7 +3340,9 @@ __declspec(naked) void CopyRow_AVX(const uint8_t* src, uint8_t* dst, int count)
|
|||||||
#endif // HAS_COPYROW_AVX
|
#endif // HAS_COPYROW_AVX
|
||||||
|
|
||||||
// Multiple of 1.
|
// Multiple of 1.
|
||||||
__declspec(naked) void CopyRow_ERMS(const uint8_t* src, uint8_t* dst, int count) {
|
__declspec(naked) void CopyRow_ERMS(const uint8_t* src,
|
||||||
|
uint8_t* dst,
|
||||||
|
int count) {
|
||||||
__asm {
|
__asm {
|
||||||
mov eax, esi
|
mov eax, esi
|
||||||
mov edx, edi
|
mov edx, edi
|
||||||
@ -3582,7 +3590,9 @@ __declspec(naked) void SetRow_ERMS(uint8_t* dst, uint8_t v8, int count) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Write 'count' 32 bit values.
|
// Write 'count' 32 bit values.
|
||||||
__declspec(naked) void ARGBSetRow_X86(uint8_t* dst_argb, uint32_t v32, int count) {
|
__declspec(naked) void ARGBSetRow_X86(uint8_t* dst_argb,
|
||||||
|
uint32_t v32,
|
||||||
|
int count) {
|
||||||
__asm {
|
__asm {
|
||||||
mov edx, edi
|
mov edx, edi
|
||||||
mov edi, [esp + 4] // dst
|
mov edi, [esp + 4] // dst
|
||||||
|
|||||||
@ -815,8 +815,8 @@ static void ScalePlaneBox(int src_width,
|
|||||||
const uint16_t* src_ptr, uint8_t* dst_ptr) =
|
const uint16_t* src_ptr, uint8_t* dst_ptr) =
|
||||||
(dx & 0xffff) ? ScaleAddCols2_C
|
(dx & 0xffff) ? ScaleAddCols2_C
|
||||||
: ((dx != 0x10000) ? ScaleAddCols1_C : ScaleAddCols0_C);
|
: ((dx != 0x10000) ? ScaleAddCols1_C : ScaleAddCols0_C);
|
||||||
void (*ScaleAddRow)(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) =
|
void (*ScaleAddRow)(const uint8_t* src_ptr, uint16_t* dst_ptr,
|
||||||
ScaleAddRow_C;
|
int src_width) = ScaleAddRow_C;
|
||||||
#if defined(HAS_SCALEADDROW_SSE2)
|
#if defined(HAS_SCALEADDROW_SSE2)
|
||||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||||
ScaleAddRow = ScaleAddRow_Any_SSE2;
|
ScaleAddRow = ScaleAddRow_Any_SSE2;
|
||||||
@ -895,8 +895,8 @@ static void ScalePlaneBox_16(int src_width,
|
|||||||
void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
|
void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
|
||||||
const uint32_t* src_ptr, uint16_t* dst_ptr) =
|
const uint32_t* src_ptr, uint16_t* dst_ptr) =
|
||||||
(dx & 0xffff) ? ScaleAddCols2_16_C : ScaleAddCols1_16_C;
|
(dx & 0xffff) ? ScaleAddCols2_16_C : ScaleAddCols1_16_C;
|
||||||
void (*ScaleAddRow)(const uint16_t* src_ptr, uint32_t* dst_ptr, int src_width) =
|
void (*ScaleAddRow)(const uint16_t* src_ptr, uint32_t* dst_ptr,
|
||||||
ScaleAddRow_16_C;
|
int src_width) = ScaleAddRow_16_C;
|
||||||
|
|
||||||
#if defined(HAS_SCALEADDROW_16_SSE2)
|
#if defined(HAS_SCALEADDROW_16_SSE2)
|
||||||
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(src_width, 16)) {
|
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(src_width, 16)) {
|
||||||
@ -946,8 +946,8 @@ void ScalePlaneBilinearDown(int src_width,
|
|||||||
|
|
||||||
const int max_y = (src_height - 1) << 16;
|
const int max_y = (src_height - 1) << 16;
|
||||||
int j;
|
int j;
|
||||||
void (*ScaleFilterCols)(uint8_t * dst_ptr, const uint8_t* src_ptr, int dst_width,
|
void (*ScaleFilterCols)(uint8_t * dst_ptr, const uint8_t* src_ptr,
|
||||||
int x, int dx) =
|
int dst_width, int x, int dx) =
|
||||||
(src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C;
|
(src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C;
|
||||||
void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
|
void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
|
||||||
ptrdiff_t src_stride, int dst_width,
|
ptrdiff_t src_stride, int dst_width,
|
||||||
@ -1144,8 +1144,8 @@ void ScalePlaneBilinearUp(int src_width,
|
|||||||
void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
|
void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
|
||||||
ptrdiff_t src_stride, int dst_width,
|
ptrdiff_t src_stride, int dst_width,
|
||||||
int source_y_fraction) = InterpolateRow_C;
|
int source_y_fraction) = InterpolateRow_C;
|
||||||
void (*ScaleFilterCols)(uint8_t * dst_ptr, const uint8_t* src_ptr, int dst_width,
|
void (*ScaleFilterCols)(uint8_t * dst_ptr, const uint8_t* src_ptr,
|
||||||
int x, int dx) =
|
int dst_width, int x, int dx) =
|
||||||
filtering ? ScaleFilterCols_C : ScaleCols_C;
|
filtering ? ScaleFilterCols_C : ScaleCols_C;
|
||||||
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
|
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
|
||||||
&dx, &dy);
|
&dx, &dy);
|
||||||
@ -1401,8 +1401,8 @@ static void ScalePlaneSimple(int src_width,
|
|||||||
const uint8_t* src_ptr,
|
const uint8_t* src_ptr,
|
||||||
uint8_t* dst_ptr) {
|
uint8_t* dst_ptr) {
|
||||||
int i;
|
int i;
|
||||||
void (*ScaleCols)(uint8_t * dst_ptr, const uint8_t* src_ptr, int dst_width, int x,
|
void (*ScaleCols)(uint8_t * dst_ptr, const uint8_t* src_ptr, int dst_width,
|
||||||
int dx) = ScaleCols_C;
|
int x, int dx) = ScaleCols_C;
|
||||||
// Initial source x/y coordinate and step values as 16.16 fixed point.
|
// Initial source x/y coordinate and step values as 16.16 fixed point.
|
||||||
int x = 0;
|
int x = 0;
|
||||||
int y = 0;
|
int y = 0;
|
||||||
@ -1759,7 +1759,8 @@ int ScaleOffset(const uint8_t* src,
|
|||||||
uint8_t* dst_y = dst + dst_yoffset_even * dst_width;
|
uint8_t* dst_y = dst + dst_yoffset_even * dst_width;
|
||||||
uint8_t* dst_u =
|
uint8_t* dst_u =
|
||||||
dst + dst_width * dst_height + (dst_yoffset_even >> 1) * dst_halfwidth;
|
dst + dst_width * dst_height + (dst_yoffset_even >> 1) * dst_halfwidth;
|
||||||
uint8_t* dst_v = dst + dst_width * dst_height + dst_halfwidth * dst_halfheight +
|
uint8_t* dst_v = dst + dst_width * dst_height +
|
||||||
|
dst_halfwidth * dst_halfheight +
|
||||||
(dst_yoffset_even >> 1) * dst_halfwidth;
|
(dst_yoffset_even >> 1) * dst_halfwidth;
|
||||||
if (!src || src_width <= 0 || src_height <= 0 || !dst || dst_width <= 0 ||
|
if (!src || src_width <= 0 || src_height <= 0 || !dst || dst_width <= 0 ||
|
||||||
dst_height <= 0 || dst_yoffset_even < 0 ||
|
dst_height <= 0 || dst_yoffset_even < 0 ||
|
||||||
|
|||||||
@ -772,8 +772,8 @@ static void ScaleARGBSimple(int src_width,
|
|||||||
int y,
|
int y,
|
||||||
int dy) {
|
int dy) {
|
||||||
int j;
|
int j;
|
||||||
void (*ScaleARGBCols)(uint8_t * dst_argb, const uint8_t* src_argb, int dst_width,
|
void (*ScaleARGBCols)(uint8_t * dst_argb, const uint8_t* src_argb,
|
||||||
int x, int dx) =
|
int dst_width, int x, int dx) =
|
||||||
(src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C;
|
(src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C;
|
||||||
(void)src_height;
|
(void)src_height;
|
||||||
#if defined(HAS_SCALEARGBCOLS_SSE2)
|
#if defined(HAS_SCALEARGBCOLS_SSE2)
|
||||||
|
|||||||
@ -758,7 +758,9 @@ void ScaleAddRow_C(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ScaleAddRow_16_C(const uint16_t* src_ptr, uint32_t* dst_ptr, int src_width) {
|
void ScaleAddRow_16_C(const uint16_t* src_ptr,
|
||||||
|
uint32_t* dst_ptr,
|
||||||
|
int src_width) {
|
||||||
int x;
|
int x;
|
||||||
assert(src_width > 0);
|
assert(src_width > 0);
|
||||||
for (x = 0; x < src_width - 1; x += 2) {
|
for (x = 0; x < src_width - 1; x += 2) {
|
||||||
|
|||||||
@ -99,6 +99,7 @@ void ScaleRowDown2_SSSE3(const uint8_t* src_ptr,
|
|||||||
int dst_width) {
|
int dst_width) {
|
||||||
(void)src_stride;
|
(void)src_stride;
|
||||||
asm volatile(
|
asm volatile(
|
||||||
|
// 16 pixel loop.
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqu (%0),%%xmm0 \n"
|
"movdqu (%0),%%xmm0 \n"
|
||||||
@ -114,8 +115,8 @@ void ScaleRowDown2_SSSE3(const uint8_t* src_ptr,
|
|||||||
: "+r"(src_ptr), // %0
|
: "+r"(src_ptr), // %0
|
||||||
"+r"(dst_ptr), // %1
|
"+r"(dst_ptr), // %1
|
||||||
"+r"(dst_width) // %2
|
"+r"(dst_width) // %2
|
||||||
:: "memory", "cc", "xmm0", "xmm1"
|
::"memory",
|
||||||
);
|
"cc", "xmm0", "xmm1");
|
||||||
}
|
}
|
||||||
|
|
||||||
void ScaleRowDown2Linear_SSSE3(const uint8_t* src_ptr,
|
void ScaleRowDown2Linear_SSSE3(const uint8_t* src_ptr,
|
||||||
@ -146,8 +147,8 @@ void ScaleRowDown2Linear_SSSE3(const uint8_t* src_ptr,
|
|||||||
: "+r"(src_ptr), // %0
|
: "+r"(src_ptr), // %0
|
||||||
"+r"(dst_ptr), // %1
|
"+r"(dst_ptr), // %1
|
||||||
"+r"(dst_width) // %2
|
"+r"(dst_width) // %2
|
||||||
:: "memory", "cc", "xmm0", "xmm1", "xmm4", "xmm5"
|
::"memory",
|
||||||
);
|
"cc", "xmm0", "xmm1", "xmm4", "xmm5");
|
||||||
}
|
}
|
||||||
|
|
||||||
void ScaleRowDown2Box_SSSE3(const uint8_t* src_ptr,
|
void ScaleRowDown2Box_SSSE3(const uint8_t* src_ptr,
|
||||||
@ -186,9 +187,7 @@ void ScaleRowDown2Box_SSSE3(const uint8_t* src_ptr,
|
|||||||
"+r"(dst_ptr), // %1
|
"+r"(dst_ptr), // %1
|
||||||
"+r"(dst_width) // %2
|
"+r"(dst_width) // %2
|
||||||
: "r"((intptr_t)(src_stride)) // %3
|
: "r"((intptr_t)(src_stride)) // %3
|
||||||
: "memory", "cc",
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5");
|
||||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWDOWN2_AVX2
|
#ifdef HAS_SCALEROWDOWN2_AVX2
|
||||||
@ -198,6 +197,7 @@ void ScaleRowDown2_AVX2(const uint8_t* src_ptr,
|
|||||||
int dst_width) {
|
int dst_width) {
|
||||||
(void)src_stride;
|
(void)src_stride;
|
||||||
asm volatile(
|
asm volatile(
|
||||||
|
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vmovdqu (%0),%%ymm0 \n"
|
"vmovdqu (%0),%%ymm0 \n"
|
||||||
@ -215,8 +215,8 @@ void ScaleRowDown2_AVX2(const uint8_t* src_ptr,
|
|||||||
: "+r"(src_ptr), // %0
|
: "+r"(src_ptr), // %0
|
||||||
"+r"(dst_ptr), // %1
|
"+r"(dst_ptr), // %1
|
||||||
"+r"(dst_width) // %2
|
"+r"(dst_width) // %2
|
||||||
:: "memory", "cc", "xmm0", "xmm1"
|
::"memory",
|
||||||
);
|
"cc", "xmm0", "xmm1");
|
||||||
}
|
}
|
||||||
|
|
||||||
void ScaleRowDown2Linear_AVX2(const uint8_t* src_ptr,
|
void ScaleRowDown2Linear_AVX2(const uint8_t* src_ptr,
|
||||||
@ -249,8 +249,8 @@ void ScaleRowDown2Linear_AVX2(const uint8_t* src_ptr,
|
|||||||
: "+r"(src_ptr), // %0
|
: "+r"(src_ptr), // %0
|
||||||
"+r"(dst_ptr), // %1
|
"+r"(dst_ptr), // %1
|
||||||
"+r"(dst_width) // %2
|
"+r"(dst_width) // %2
|
||||||
:: "memory", "cc", "xmm0", "xmm1", "xmm4", "xmm5"
|
::"memory",
|
||||||
);
|
"cc", "xmm0", "xmm1", "xmm4", "xmm5");
|
||||||
}
|
}
|
||||||
|
|
||||||
void ScaleRowDown2Box_AVX2(const uint8_t* src_ptr,
|
void ScaleRowDown2Box_AVX2(const uint8_t* src_ptr,
|
||||||
@ -291,9 +291,7 @@ void ScaleRowDown2Box_AVX2(const uint8_t* src_ptr,
|
|||||||
"+r"(dst_ptr), // %1
|
"+r"(dst_ptr), // %1
|
||||||
"+r"(dst_width) // %2
|
"+r"(dst_width) // %2
|
||||||
: "r"((intptr_t)(src_stride)) // %3
|
: "r"((intptr_t)(src_stride)) // %3
|
||||||
: "memory", "cc",
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5");
|
||||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
#endif // HAS_SCALEROWDOWN2_AVX2
|
#endif // HAS_SCALEROWDOWN2_AVX2
|
||||||
|
|
||||||
@ -324,8 +322,8 @@ void ScaleRowDown4_SSSE3(const uint8_t* src_ptr,
|
|||||||
: "+r"(src_ptr), // %0
|
: "+r"(src_ptr), // %0
|
||||||
"+r"(dst_ptr), // %1
|
"+r"(dst_ptr), // %1
|
||||||
"+r"(dst_width) // %2
|
"+r"(dst_width) // %2
|
||||||
:: "memory", "cc", "xmm0", "xmm1", "xmm5"
|
::"memory",
|
||||||
);
|
"cc", "xmm0", "xmm1", "xmm5");
|
||||||
}
|
}
|
||||||
|
|
||||||
void ScaleRowDown4Box_SSSE3(const uint8_t* src_ptr,
|
void ScaleRowDown4Box_SSSE3(const uint8_t* src_ptr,
|
||||||
@ -379,9 +377,7 @@ void ScaleRowDown4Box_SSSE3(const uint8_t* src_ptr,
|
|||||||
"+r"(dst_width), // %2
|
"+r"(dst_width), // %2
|
||||||
"=&r"(stridex3) // %3
|
"=&r"(stridex3) // %3
|
||||||
: "r"((intptr_t)(src_stride)) // %4
|
: "r"((intptr_t)(src_stride)) // %4
|
||||||
: "memory", "cc",
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
|
||||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_SCALEROWDOWN4_AVX2
|
#ifdef HAS_SCALEROWDOWN4_AVX2
|
||||||
@ -394,6 +390,7 @@ void ScaleRowDown4_AVX2(const uint8_t* src_ptr,
|
|||||||
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
||||||
"vpsrld $0x18,%%ymm5,%%ymm5 \n"
|
"vpsrld $0x18,%%ymm5,%%ymm5 \n"
|
||||||
"vpslld $0x10,%%ymm5,%%ymm5 \n"
|
"vpslld $0x10,%%ymm5,%%ymm5 \n"
|
||||||
|
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vmovdqu (%0),%%ymm0 \n"
|
"vmovdqu (%0),%%ymm0 \n"
|
||||||
@ -414,8 +411,8 @@ void ScaleRowDown4_AVX2(const uint8_t* src_ptr,
|
|||||||
: "+r"(src_ptr), // %0
|
: "+r"(src_ptr), // %0
|
||||||
"+r"(dst_ptr), // %1
|
"+r"(dst_ptr), // %1
|
||||||
"+r"(dst_width) // %2
|
"+r"(dst_width) // %2
|
||||||
:: "memory", "cc", "xmm0", "xmm1", "xmm5"
|
::"memory",
|
||||||
);
|
"cc", "xmm0", "xmm1", "xmm5");
|
||||||
}
|
}
|
||||||
|
|
||||||
void ScaleRowDown4Box_AVX2(const uint8_t* src_ptr,
|
void ScaleRowDown4Box_AVX2(const uint8_t* src_ptr,
|
||||||
@ -469,9 +466,7 @@ void ScaleRowDown4Box_AVX2(const uint8_t* src_ptr,
|
|||||||
"+r"(dst_width) // %2
|
"+r"(dst_width) // %2
|
||||||
: "r"((intptr_t)(src_stride)), // %3
|
: "r"((intptr_t)(src_stride)), // %3
|
||||||
"r"((intptr_t)(src_stride * 3)) // %4
|
"r"((intptr_t)(src_stride * 3)) // %4
|
||||||
: "memory", "cc",
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
|
||||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
#endif // HAS_SCALEROWDOWN4_AVX2
|
#endif // HAS_SCALEROWDOWN4_AVX2
|
||||||
|
|
||||||
@ -490,6 +485,7 @@ void ScaleRowDown34_SSSE3(const uint8_t* src_ptr,
|
|||||||
"m"(kShuf2) // %2
|
"m"(kShuf2) // %2
|
||||||
);
|
);
|
||||||
asm volatile(
|
asm volatile(
|
||||||
|
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqu (%0),%%xmm0 \n"
|
"movdqu (%0),%%xmm0 \n"
|
||||||
@ -509,8 +505,8 @@ void ScaleRowDown34_SSSE3(const uint8_t* src_ptr,
|
|||||||
: "+r"(src_ptr), // %0
|
: "+r"(src_ptr), // %0
|
||||||
"+r"(dst_ptr), // %1
|
"+r"(dst_ptr), // %1
|
||||||
"+r"(dst_width) // %2
|
"+r"(dst_width) // %2
|
||||||
:: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
::"memory",
|
||||||
);
|
"cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
|
||||||
}
|
}
|
||||||
|
|
||||||
void ScaleRowDown34_1_Box_SSSE3(const uint8_t* src_ptr,
|
void ScaleRowDown34_1_Box_SSSE3(const uint8_t* src_ptr,
|
||||||
@ -536,6 +532,7 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8_t* src_ptr,
|
|||||||
"m"(kRound34) // %2
|
"m"(kRound34) // %2
|
||||||
);
|
);
|
||||||
asm volatile(
|
asm volatile(
|
||||||
|
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqu (%0),%%xmm6 \n"
|
"movdqu (%0),%%xmm6 \n"
|
||||||
@ -574,9 +571,8 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8_t* src_ptr,
|
|||||||
"+r"(dst_width) // %2
|
"+r"(dst_width) // %2
|
||||||
: "r"((intptr_t)(src_stride)), // %3
|
: "r"((intptr_t)(src_stride)), // %3
|
||||||
"m"(kMadd21) // %4
|
"m"(kMadd21) // %4
|
||||||
: "memory", "cc",
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
||||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
|
"xmm7");
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ScaleRowDown34_0_Box_SSSE3(const uint8_t* src_ptr,
|
void ScaleRowDown34_0_Box_SSSE3(const uint8_t* src_ptr,
|
||||||
@ -603,6 +599,7 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8_t* src_ptr,
|
|||||||
);
|
);
|
||||||
|
|
||||||
asm volatile(
|
asm volatile(
|
||||||
|
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqu (%0),%%xmm6 \n"
|
"movdqu (%0),%%xmm6 \n"
|
||||||
@ -644,9 +641,8 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8_t* src_ptr,
|
|||||||
"+r"(dst_width) // %2
|
"+r"(dst_width) // %2
|
||||||
: "r"((intptr_t)(src_stride)), // %3
|
: "r"((intptr_t)(src_stride)), // %3
|
||||||
"m"(kMadd21) // %4
|
"m"(kMadd21) // %4
|
||||||
: "memory", "cc",
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
||||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
|
"xmm7");
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ScaleRowDown38_SSSE3(const uint8_t* src_ptr,
|
void ScaleRowDown38_SSSE3(const uint8_t* src_ptr,
|
||||||
@ -677,8 +673,7 @@ void ScaleRowDown38_SSSE3(const uint8_t* src_ptr,
|
|||||||
"+r"(dst_width) // %2
|
"+r"(dst_width) // %2
|
||||||
: "m"(kShuf38a), // %3
|
: "m"(kShuf38a), // %3
|
||||||
"m"(kShuf38b) // %4
|
"m"(kShuf38b) // %4
|
||||||
: "memory", "cc", "xmm0", "xmm1", "xmm4", "xmm5"
|
: "memory", "cc", "xmm0", "xmm1", "xmm4", "xmm5");
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ScaleRowDown38_2_Box_SSSE3(const uint8_t* src_ptr,
|
void ScaleRowDown38_2_Box_SSSE3(const uint8_t* src_ptr,
|
||||||
@ -697,6 +692,7 @@ void ScaleRowDown38_2_Box_SSSE3(const uint8_t* src_ptr,
|
|||||||
"m"(kScaleAb2) // %3
|
"m"(kScaleAb2) // %3
|
||||||
);
|
);
|
||||||
asm volatile(
|
asm volatile(
|
||||||
|
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqu (%0),%%xmm0 \n"
|
"movdqu (%0),%%xmm0 \n"
|
||||||
@ -722,9 +718,7 @@ void ScaleRowDown38_2_Box_SSSE3(const uint8_t* src_ptr,
|
|||||||
"+r"(dst_ptr), // %1
|
"+r"(dst_ptr), // %1
|
||||||
"+r"(dst_width) // %2
|
"+r"(dst_width) // %2
|
||||||
: "r"((intptr_t)(src_stride)) // %3
|
: "r"((intptr_t)(src_stride)) // %3
|
||||||
: "memory", "cc",
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
|
||||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ScaleRowDown38_3_Box_SSSE3(const uint8_t* src_ptr,
|
void ScaleRowDown38_3_Box_SSSE3(const uint8_t* src_ptr,
|
||||||
@ -742,6 +736,7 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8_t* src_ptr,
|
|||||||
"m"(kScaleAc33) // %2
|
"m"(kScaleAc33) // %2
|
||||||
);
|
);
|
||||||
asm volatile(
|
asm volatile(
|
||||||
|
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqu (%0),%%xmm0 \n"
|
"movdqu (%0),%%xmm0 \n"
|
||||||
@ -786,16 +781,19 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8_t* src_ptr,
|
|||||||
"+r"(dst_ptr), // %1
|
"+r"(dst_ptr), // %1
|
||||||
"+r"(dst_width) // %2
|
"+r"(dst_width) // %2
|
||||||
: "r"((intptr_t)(src_stride)) // %3
|
: "r"((intptr_t)(src_stride)) // %3
|
||||||
: "memory", "cc",
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
||||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
|
"xmm7");
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reads 16xN bytes and produces 16 shorts at a time.
|
// Reads 16xN bytes and produces 16 shorts at a time.
|
||||||
void ScaleAddRow_SSE2(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) {
|
void ScaleAddRow_SSE2(const uint8_t* src_ptr,
|
||||||
|
uint16_t* dst_ptr,
|
||||||
|
int src_width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
|
|
||||||
"pxor %%xmm5,%%xmm5 \n"
|
"pxor %%xmm5,%%xmm5 \n"
|
||||||
|
|
||||||
|
// 16 pixel loop.
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqu (%0),%%xmm3 \n"
|
"movdqu (%0),%%xmm3 \n"
|
||||||
@ -816,14 +814,16 @@ void ScaleAddRow_SSE2(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width)
|
|||||||
"+r"(dst_ptr), // %1
|
"+r"(dst_ptr), // %1
|
||||||
"+r"(src_width) // %2
|
"+r"(src_width) // %2
|
||||||
:
|
:
|
||||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5");
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_SCALEADDROW_AVX2
|
#ifdef HAS_SCALEADDROW_AVX2
|
||||||
// Reads 32 bytes and accumulates to 32 shorts at a time.
|
// Reads 32 bytes and accumulates to 32 shorts at a time.
|
||||||
void ScaleAddRow_AVX2(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) {
|
void ScaleAddRow_AVX2(const uint8_t* src_ptr,
|
||||||
|
uint16_t* dst_ptr,
|
||||||
|
int src_width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
|
|
||||||
"vpxor %%ymm5,%%ymm5,%%ymm5 \n"
|
"vpxor %%ymm5,%%ymm5,%%ymm5 \n"
|
||||||
|
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
@ -845,8 +845,7 @@ void ScaleAddRow_AVX2(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width)
|
|||||||
"+r"(dst_ptr), // %1
|
"+r"(dst_ptr), // %1
|
||||||
"+r"(src_width) // %2
|
"+r"(src_width) // %2
|
||||||
:
|
:
|
||||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5");
|
||||||
);
|
|
||||||
}
|
}
|
||||||
#endif // HAS_SCALEADDROW_AVX2
|
#endif // HAS_SCALEADDROW_AVX2
|
||||||
|
|
||||||
@ -898,7 +897,8 @@ void ScaleFilterCols_SSSE3(uint8_t* dst_ptr,
|
|||||||
"pshufb %%xmm5,%%xmm1 \n"
|
"pshufb %%xmm5,%%xmm1 \n"
|
||||||
"punpcklwd %%xmm4,%%xmm0 \n"
|
"punpcklwd %%xmm4,%%xmm0 \n"
|
||||||
"psubb %8,%%xmm0 \n" // make pixels signed.
|
"psubb %8,%%xmm0 \n" // make pixels signed.
|
||||||
"pxor %%xmm6,%%xmm1 \n" // 128 - f = (f ^ 127 ) + 1
|
"pxor %%xmm6,%%xmm1 \n" // 128 - f = (f ^ 127 ) +
|
||||||
|
// 1
|
||||||
"paddusb %%xmm7,%%xmm1 \n"
|
"paddusb %%xmm7,%%xmm1 \n"
|
||||||
"pmaddubsw %%xmm0,%%xmm1 \n"
|
"pmaddubsw %%xmm0,%%xmm1 \n"
|
||||||
"pextrw $0x1,%%xmm2,%k3 \n"
|
"pextrw $0x1,%%xmm2,%k3 \n"
|
||||||
@ -949,9 +949,8 @@ void ScaleFilterCols_SSSE3(uint8_t* dst_ptr,
|
|||||||
"m"(kFsub80), // %8
|
"m"(kFsub80), // %8
|
||||||
"m"(kFadd40) // %9
|
"m"(kFadd40) // %9
|
||||||
#endif
|
#endif
|
||||||
: "memory", "cc",
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
||||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
|
"xmm7");
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reads 4 pixels, duplicates them and writes 8 pixels.
|
// Reads 4 pixels, duplicates them and writes 8 pixels.
|
||||||
@ -964,6 +963,7 @@ void ScaleColsUp2_SSE2(uint8_t* dst_ptr,
|
|||||||
(void)x;
|
(void)x;
|
||||||
(void)dx;
|
(void)dx;
|
||||||
asm volatile(
|
asm volatile(
|
||||||
|
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqu (%1),%%xmm0 \n"
|
"movdqu (%1),%%xmm0 \n"
|
||||||
@ -980,8 +980,8 @@ void ScaleColsUp2_SSE2(uint8_t* dst_ptr,
|
|||||||
: "+r"(dst_ptr), // %0
|
: "+r"(dst_ptr), // %0
|
||||||
"+r"(src_ptr), // %1
|
"+r"(src_ptr), // %1
|
||||||
"+r"(dst_width) // %2
|
"+r"(dst_width) // %2
|
||||||
:: "memory", "cc", "xmm0", "xmm1"
|
::"memory",
|
||||||
);
|
"cc", "xmm0", "xmm1");
|
||||||
}
|
}
|
||||||
|
|
||||||
void ScaleARGBRowDown2_SSE2(const uint8_t* src_argb,
|
void ScaleARGBRowDown2_SSE2(const uint8_t* src_argb,
|
||||||
@ -990,6 +990,7 @@ void ScaleARGBRowDown2_SSE2(const uint8_t* src_argb,
|
|||||||
int dst_width) {
|
int dst_width) {
|
||||||
(void)src_stride;
|
(void)src_stride;
|
||||||
asm volatile(
|
asm volatile(
|
||||||
|
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqu (%0),%%xmm0 \n"
|
"movdqu (%0),%%xmm0 \n"
|
||||||
@ -1003,8 +1004,8 @@ void ScaleARGBRowDown2_SSE2(const uint8_t* src_argb,
|
|||||||
: "+r"(src_argb), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(dst_argb), // %1
|
"+r"(dst_argb), // %1
|
||||||
"+r"(dst_width) // %2
|
"+r"(dst_width) // %2
|
||||||
:: "memory", "cc", "xmm0", "xmm1"
|
::"memory",
|
||||||
);
|
"cc", "xmm0", "xmm1");
|
||||||
}
|
}
|
||||||
|
|
||||||
void ScaleARGBRowDown2Linear_SSE2(const uint8_t* src_argb,
|
void ScaleARGBRowDown2Linear_SSE2(const uint8_t* src_argb,
|
||||||
@ -1013,6 +1014,7 @@ void ScaleARGBRowDown2Linear_SSE2(const uint8_t* src_argb,
|
|||||||
int dst_width) {
|
int dst_width) {
|
||||||
(void)src_stride;
|
(void)src_stride;
|
||||||
asm volatile(
|
asm volatile(
|
||||||
|
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqu (%0),%%xmm0 \n"
|
"movdqu (%0),%%xmm0 \n"
|
||||||
@ -1029,8 +1031,8 @@ void ScaleARGBRowDown2Linear_SSE2(const uint8_t* src_argb,
|
|||||||
: "+r"(src_argb), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(dst_argb), // %1
|
"+r"(dst_argb), // %1
|
||||||
"+r"(dst_width) // %2
|
"+r"(dst_width) // %2
|
||||||
:: "memory", "cc", "xmm0", "xmm1"
|
::"memory",
|
||||||
);
|
"cc", "xmm0", "xmm1");
|
||||||
}
|
}
|
||||||
|
|
||||||
void ScaleARGBRowDown2Box_SSE2(const uint8_t* src_argb,
|
void ScaleARGBRowDown2Box_SSE2(const uint8_t* src_argb,
|
||||||
@ -1038,6 +1040,7 @@ void ScaleARGBRowDown2Box_SSE2(const uint8_t* src_argb,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int dst_width) {
|
int dst_width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
|
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqu (%0),%%xmm0 \n"
|
"movdqu (%0),%%xmm0 \n"
|
||||||
@ -1059,9 +1062,7 @@ void ScaleARGBRowDown2Box_SSE2(const uint8_t* src_argb,
|
|||||||
"+r"(dst_argb), // %1
|
"+r"(dst_argb), // %1
|
||||||
"+r"(dst_width) // %2
|
"+r"(dst_width) // %2
|
||||||
: "r"((intptr_t)(src_stride)) // %3
|
: "r"((intptr_t)(src_stride)) // %3
|
||||||
: "memory", "cc",
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3");
|
||||||
"xmm0", "xmm1", "xmm2", "xmm3"
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reads 4 pixels at a time.
|
// Reads 4 pixels at a time.
|
||||||
@ -1077,6 +1078,7 @@ void ScaleARGBRowDownEven_SSE2(const uint8_t* src_argb,
|
|||||||
asm volatile(
|
asm volatile(
|
||||||
"lea 0x00(,%1,4),%1 \n"
|
"lea 0x00(,%1,4),%1 \n"
|
||||||
"lea 0x00(%1,%1,2),%4 \n"
|
"lea 0x00(%1,%1,2),%4 \n"
|
||||||
|
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movd (%0),%%xmm0 \n"
|
"movd (%0),%%xmm0 \n"
|
||||||
@ -1096,9 +1098,8 @@ void ScaleARGBRowDownEven_SSE2(const uint8_t* src_argb,
|
|||||||
"+r"(dst_argb), // %2
|
"+r"(dst_argb), // %2
|
||||||
"+r"(dst_width), // %3
|
"+r"(dst_width), // %3
|
||||||
"=&r"(src_stepx_x12) // %4
|
"=&r"(src_stepx_x12) // %4
|
||||||
:: "memory", "cc",
|
::"memory",
|
||||||
"xmm0", "xmm1", "xmm2", "xmm3"
|
"cc", "xmm0", "xmm1", "xmm2", "xmm3");
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Blends four 2x2 to 4x1.
|
// Blends four 2x2 to 4x1.
|
||||||
@ -1144,9 +1145,8 @@ void ScaleARGBRowDownEvenBox_SSE2(const uint8_t* src_argb,
|
|||||||
"+rm"(dst_width), // %3
|
"+rm"(dst_width), // %3
|
||||||
"=&r"(src_stepx_x12), // %4
|
"=&r"(src_stepx_x12), // %4
|
||||||
"+r"(row1) // %5
|
"+r"(row1) // %5
|
||||||
:: "memory", "cc",
|
::"memory",
|
||||||
"xmm0", "xmm1", "xmm2", "xmm3"
|
"cc", "xmm0", "xmm1", "xmm2", "xmm3");
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ScaleARGBCols_SSE2(uint8_t* dst_argb,
|
void ScaleARGBCols_SSE2(uint8_t* dst_argb,
|
||||||
@ -1214,9 +1214,7 @@ void ScaleARGBCols_SSE2(uint8_t* dst_argb,
|
|||||||
"+r"(dst_width) // %4
|
"+r"(dst_width) // %4
|
||||||
: "rm"(x), // %5
|
: "rm"(x), // %5
|
||||||
"rm"(dx) // %6
|
"rm"(dx) // %6
|
||||||
: "memory", "cc",
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4");
|
||||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reads 4 pixels, duplicates them and writes 8 pixels.
|
// Reads 4 pixels, duplicates them and writes 8 pixels.
|
||||||
@ -1229,6 +1227,7 @@ void ScaleARGBColsUp2_SSE2(uint8_t* dst_argb,
|
|||||||
(void)x;
|
(void)x;
|
||||||
(void)dx;
|
(void)dx;
|
||||||
asm volatile(
|
asm volatile(
|
||||||
|
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"movdqu (%1),%%xmm0 \n"
|
"movdqu (%1),%%xmm0 \n"
|
||||||
@ -1245,9 +1244,8 @@ void ScaleARGBColsUp2_SSE2(uint8_t* dst_argb,
|
|||||||
: "+r"(dst_argb), // %0
|
: "+r"(dst_argb), // %0
|
||||||
"+r"(src_argb), // %1
|
"+r"(src_argb), // %1
|
||||||
"+r"(dst_width) // %2
|
"+r"(dst_width) // %2
|
||||||
:: "memory", "cc",
|
::"memory",
|
||||||
"xmm0", "xmm1"
|
"cc", "xmm0", "xmm1");
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Shuffle table for arranging 2 pixels into pairs for pmaddubsw
|
// Shuffle table for arranging 2 pixels into pairs for pmaddubsw
|
||||||
@ -1325,8 +1323,7 @@ void ScaleARGBFilterCols_SSSE3(uint8_t* dst_argb,
|
|||||||
"packuswb %%xmm0,%%xmm0 \n"
|
"packuswb %%xmm0,%%xmm0 \n"
|
||||||
"movd %%xmm0,(%0) \n"
|
"movd %%xmm0,(%0) \n"
|
||||||
|
|
||||||
LABELALIGN
|
LABELALIGN "99: \n" // clang-format error.
|
||||||
"99: \n"
|
|
||||||
: "+r"(dst_argb), // %0
|
: "+r"(dst_argb), // %0
|
||||||
"+r"(src_argb), // %1
|
"+r"(src_argb), // %1
|
||||||
"+rm"(dst_width), // %2
|
"+rm"(dst_width), // %2
|
||||||
@ -1334,9 +1331,7 @@ void ScaleARGBFilterCols_SSSE3(uint8_t* dst_argb,
|
|||||||
"=&r"(x1) // %4
|
"=&r"(x1) // %4
|
||||||
: "rm"(x), // %5
|
: "rm"(x), // %5
|
||||||
"rm"(dx) // %6
|
"rm"(dx) // %6
|
||||||
: "memory", "cc",
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
|
||||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Divide num by div and return as 16.16 fixed point result.
|
// Divide num by div and return as 16.16 fixed point result.
|
||||||
|
|||||||
@ -15,14 +15,13 @@ namespace libyuv {
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof(x[0]))
|
|
||||||
|
|
||||||
struct FourCCAliasEntry {
|
struct FourCCAliasEntry {
|
||||||
uint32_t alias;
|
uint32_t alias;
|
||||||
uint32_t canonical;
|
uint32_t canonical;
|
||||||
};
|
};
|
||||||
|
|
||||||
static const struct FourCCAliasEntry kFourCCAliases[] = {
|
#define NUM_ALIASES 18
|
||||||
|
static const struct FourCCAliasEntry kFourCCAliases[NUM_ALIASES] = {
|
||||||
{FOURCC_IYUV, FOURCC_I420},
|
{FOURCC_IYUV, FOURCC_I420},
|
||||||
{FOURCC_YU12, FOURCC_I420},
|
{FOURCC_YU12, FOURCC_I420},
|
||||||
{FOURCC_YU16, FOURCC_I422},
|
{FOURCC_YU16, FOURCC_I422},
|
||||||
@ -48,7 +47,7 @@ static const struct FourCCAliasEntry kFourCCAliases[] = {
|
|||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
uint32_t CanonicalFourCC(uint32_t fourcc) {
|
uint32_t CanonicalFourCC(uint32_t fourcc) {
|
||||||
int i;
|
int i;
|
||||||
for (i = 0; i < ARRAY_SIZE(kFourCCAliases); ++i) {
|
for (i = 0; i < NUM_ALIASES; ++i) {
|
||||||
if (kFourCCAliases[i].alias == fourcc) {
|
if (kFourCCAliases[i].alias == fourcc) {
|
||||||
return kFourCCAliases[i].canonical;
|
return kFourCCAliases[i].canonical;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -22,7 +22,9 @@
|
|||||||
namespace libyuv {
|
namespace libyuv {
|
||||||
|
|
||||||
// hash seed of 5381 recommended.
|
// hash seed of 5381 recommended.
|
||||||
static uint32_t ReferenceHashDjb2(const uint8_t* src, uint64_t count, uint32_t seed) {
|
static uint32_t ReferenceHashDjb2(const uint8_t* src,
|
||||||
|
uint64_t count,
|
||||||
|
uint32_t seed) {
|
||||||
uint32_t hash = seed;
|
uint32_t hash = seed;
|
||||||
if (count > 0) {
|
if (count > 0) {
|
||||||
do {
|
do {
|
||||||
|
|||||||
@ -2040,7 +2040,8 @@ TEST_F(LibYUVConvertTest, ARGBToAR30Row_Opt) {
|
|||||||
memset(dst_argb_c + DOFF, 1, kStrideB * kHeight); \
|
memset(dst_argb_c + DOFF, 1, kStrideB * kHeight); \
|
||||||
memset(dst_argb_opt + DOFF, 101, kStrideB * kHeight); \
|
memset(dst_argb_opt + DOFF, 101, kStrideB * kHeight); \
|
||||||
MaskCpuFlags(disable_cpu_flags_); \
|
MaskCpuFlags(disable_cpu_flags_); \
|
||||||
FMT_PLANAR##To##FMT_B(reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth, \
|
FMT_PLANAR##To##FMT_B( \
|
||||||
|
reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth, \
|
||||||
reinterpret_cast<uint16_t*>(src_u + SOFF), kStrideUV, \
|
reinterpret_cast<uint16_t*>(src_u + SOFF), kStrideUV, \
|
||||||
reinterpret_cast<uint16_t*>(src_v + SOFF), kStrideUV, \
|
reinterpret_cast<uint16_t*>(src_v + SOFF), kStrideUV, \
|
||||||
dst_argb_c + DOFF, kStrideB, kWidth, NEG kHeight); \
|
dst_argb_c + DOFF, kStrideB, kWidth, NEG kHeight); \
|
||||||
|
|||||||
@ -2061,8 +2061,8 @@ int TestHalfFloatPlane(int benchmark_width,
|
|||||||
MaskCpuFlags(disable_cpu_flags);
|
MaskCpuFlags(disable_cpu_flags);
|
||||||
for (j = 0; j < benchmark_iterations; j++) {
|
for (j = 0; j < benchmark_iterations; j++) {
|
||||||
HalfFloatPlane(reinterpret_cast<uint16_t*>(orig_y), benchmark_width * 2,
|
HalfFloatPlane(reinterpret_cast<uint16_t*>(orig_y), benchmark_width * 2,
|
||||||
reinterpret_cast<uint16_t*>(dst_c), benchmark_width * 2, scale,
|
reinterpret_cast<uint16_t*>(dst_c), benchmark_width * 2,
|
||||||
benchmark_width, benchmark_height);
|
scale, benchmark_width, benchmark_height);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Enable optimizations.
|
// Enable optimizations.
|
||||||
@ -2075,7 +2075,8 @@ int TestHalfFloatPlane(int benchmark_width,
|
|||||||
|
|
||||||
int max_diff = 0;
|
int max_diff = 0;
|
||||||
for (i = 0; i < y_plane_size / 2; ++i) {
|
for (i = 0; i < y_plane_size / 2; ++i) {
|
||||||
int abs_diff = abs(static_cast<int>(reinterpret_cast<uint16_t*>(dst_c)[i]) -
|
int abs_diff =
|
||||||
|
abs(static_cast<int>(reinterpret_cast<uint16_t*>(dst_c)[i]) -
|
||||||
static_cast<int>(reinterpret_cast<uint16_t*>(dst_opt)[i]));
|
static_cast<int>(reinterpret_cast<uint16_t*>(dst_opt)[i]));
|
||||||
if (abs_diff > max_diff) {
|
if (abs_diff > max_diff) {
|
||||||
max_diff = abs_diff;
|
max_diff = abs_diff;
|
||||||
@ -2788,8 +2789,9 @@ TEST_F(LibYUVPlanarTest, Convert8To16Plane) {
|
|||||||
|
|
||||||
MaskCpuFlags(disable_cpu_flags_);
|
MaskCpuFlags(disable_cpu_flags_);
|
||||||
Convert8To16Plane(src_pixels_y, benchmark_width_,
|
Convert8To16Plane(src_pixels_y, benchmark_width_,
|
||||||
reinterpret_cast<uint16_t*>(dst_pixels_y_c), benchmark_width_,
|
reinterpret_cast<uint16_t*>(dst_pixels_y_c),
|
||||||
1024, benchmark_width_, benchmark_height_);
|
benchmark_width_, 1024, benchmark_width_,
|
||||||
|
benchmark_height_);
|
||||||
MaskCpuFlags(benchmark_cpu_info_);
|
MaskCpuFlags(benchmark_cpu_info_);
|
||||||
|
|
||||||
for (int i = 0; i < benchmark_iterations_; ++i) {
|
for (int i = 0; i < benchmark_iterations_; ++i) {
|
||||||
@ -3214,8 +3216,9 @@ TEST_F(LibYUVPlanarTest, TestGaussCol_Opt) {
|
|||||||
EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
|
EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
EXPECT_EQ(dst_pixels_c[0], static_cast<uint32_t>(0 * 1 + 640 * 4 + 640 * 2 * 6 +
|
EXPECT_EQ(dst_pixels_c[0],
|
||||||
640 * 3 * 4 + 640 * 4 * 1));
|
static_cast<uint32_t>(0 * 1 + 640 * 4 + 640 * 2 * 6 + 640 * 3 * 4 +
|
||||||
|
640 * 4 * 1));
|
||||||
EXPECT_EQ(dst_pixels_c[639], static_cast<uint32_t>(30704));
|
EXPECT_EQ(dst_pixels_c[639], static_cast<uint32_t>(30704));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -48,7 +48,8 @@ static int ARGBTestFilter(int src_width,
|
|||||||
}
|
}
|
||||||
MemRandomize(src_argb, src_argb_plane_size);
|
MemRandomize(src_argb, src_argb_plane_size);
|
||||||
|
|
||||||
int64_t dst_argb_plane_size = (dst_width + b * 2) * (dst_height + b * 2) * 4LL;
|
int64_t dst_argb_plane_size =
|
||||||
|
(dst_width + b * 2) * (dst_height + b * 2) * 4LL;
|
||||||
int dst_stride_argb = (b * 2 + dst_width) * 4;
|
int dst_stride_argb = (b * 2 + dst_width) * 4;
|
||||||
|
|
||||||
align_buffer_page_end(dst_argb_c, dst_argb_plane_size);
|
align_buffer_page_end(dst_argb_c, dst_argb_plane_size);
|
||||||
@ -310,6 +311,7 @@ TEST_SCALETO(ARGBScale, 1280, 720)
|
|||||||
#undef TEST_SCALETO
|
#undef TEST_SCALETO
|
||||||
|
|
||||||
// Scale with YUV conversion to ARGB and clipping.
|
// Scale with YUV conversion to ARGB and clipping.
|
||||||
|
// TODO(fbarchard): Add fourcc support. All 4 ARGB formats is easy to support.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int YUVToARGBScaleReference2(const uint8_t* src_y,
|
int YUVToARGBScaleReference2(const uint8_t* src_y,
|
||||||
int src_stride_y,
|
int src_stride_y,
|
||||||
@ -317,12 +319,12 @@ int YUVToARGBScaleReference2(const uint8_t* src_y,
|
|||||||
int src_stride_u,
|
int src_stride_u,
|
||||||
const uint8_t* src_v,
|
const uint8_t* src_v,
|
||||||
int src_stride_v,
|
int src_stride_v,
|
||||||
uint32_t /* src_fourcc */, // TODO: Add support.
|
uint32 /* src_fourcc */,
|
||||||
int src_width,
|
int src_width,
|
||||||
int src_height,
|
int src_height,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int dst_stride_argb,
|
int dst_stride_argb,
|
||||||
uint32_t /* dst_fourcc */, // TODO: Add support.
|
uint32 /* dst_fourcc */,
|
||||||
int dst_width,
|
int dst_width,
|
||||||
int dst_height,
|
int dst_height,
|
||||||
int clip_x,
|
int clip_x,
|
||||||
@ -330,7 +332,8 @@ int YUVToARGBScaleReference2(const uint8_t* src_y,
|
|||||||
int clip_width,
|
int clip_width,
|
||||||
int clip_height,
|
int clip_height,
|
||||||
enum FilterMode filtering) {
|
enum FilterMode filtering) {
|
||||||
uint8_t* argb_buffer = static_cast<uint8_t*>(malloc(src_width * src_height * 4));
|
uint8_t* argb_buffer =
|
||||||
|
static_cast<uint8_t*>(malloc(src_width * src_height * 4));
|
||||||
int r;
|
int r;
|
||||||
I420ToARGB(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
|
I420ToARGB(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
|
||||||
argb_buffer, src_width * 4, src_width, src_height);
|
argb_buffer, src_width * 4, src_width, src_height);
|
||||||
@ -342,7 +345,12 @@ int YUVToARGBScaleReference2(const uint8_t* src_y,
|
|||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void FillRamp(uint8_t* buf, int width, int height, int v, int dx, int dy) {
|
static void FillRamp(uint8_t* buf,
|
||||||
|
int width,
|
||||||
|
int height,
|
||||||
|
int v,
|
||||||
|
int dx,
|
||||||
|
int dy) {
|
||||||
int rv = v;
|
int rv = v;
|
||||||
for (int y = 0; y < height; ++y) {
|
for (int y = 0; y < height; ++y) {
|
||||||
for (int x = 0; x < width; ++x) {
|
for (int x = 0; x < width; ++x) {
|
||||||
|
|||||||
@ -242,8 +242,8 @@ static uint32_t SumSquareError_C(const uint8_t* src_a,
|
|||||||
double ComputeSumSquareError(const uint8_t* src_a,
|
double ComputeSumSquareError(const uint8_t* src_a,
|
||||||
const uint8_t* src_b,
|
const uint8_t* src_b,
|
||||||
int count) {
|
int count) {
|
||||||
uint32_t (*SumSquareError)(const uint8_t* src_a, const uint8_t* src_b, int count) =
|
uint32_t (*SumSquareError)(const uint8_t* src_a, const uint8_t* src_b,
|
||||||
SumSquareError_C;
|
int count) = SumSquareError_C;
|
||||||
#if defined(HAS_SUMSQUAREERROR_NEON)
|
#if defined(HAS_SUMSQUAREERROR_NEON)
|
||||||
SumSquareError = SumSquareError_NEON;
|
SumSquareError = SumSquareError_NEON;
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user