Lint cleanup after C99 change CL

TBR=braveyao@chromium.org
Bug: libyuv:774
Test: git cl lint
Change-Id: I51cf8107a8db17fbc9952d610f3e4d7aac5aa743
Reviewed-on: https://chromium-review.googlesource.com/882217
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
Frank Barchard 2018-01-24 10:52:17 -08:00 committed by Frank Barchard
parent f1c5345046
commit 92e22cf5b6
44 changed files with 5881 additions and 5625 deletions

View File

@ -25,25 +25,30 @@ uint32_t HashDjb2(const uint8_t* src, uint64_t count, uint32_t seed);
// Hamming Distance // Hamming Distance
LIBYUV_API LIBYUV_API
uint64_t ComputeHammingDistance(const uint8_t* src_a, uint64_t ComputeHammingDistance(const uint8_t* src_a,
const uint8_t* src_b, const uint8_t* src_b,
int count); int count);
// Scan an opaque argb image and return fourcc based on alpha offset. // Scan an opaque argb image and return fourcc based on alpha offset.
// Returns FOURCC_ARGB, FOURCC_BGRA, or 0 if unknown. // Returns FOURCC_ARGB, FOURCC_BGRA, or 0 if unknown.
LIBYUV_API LIBYUV_API
uint32_t ARGBDetect(const uint8_t* argb, int stride_argb, int width, int height); uint32_t ARGBDetect(const uint8_t* argb,
int stride_argb,
int width,
int height);
// Sum Square Error - used to compute Mean Square Error or PSNR. // Sum Square Error - used to compute Mean Square Error or PSNR.
LIBYUV_API LIBYUV_API
uint64_t ComputeSumSquareError(const uint8_t* src_a, const uint8_t* src_b, int count); uint64_t ComputeSumSquareError(const uint8_t* src_a,
const uint8_t* src_b,
int count);
LIBYUV_API LIBYUV_API
uint64_t ComputeSumSquareErrorPlane(const uint8_t* src_a, uint64_t ComputeSumSquareErrorPlane(const uint8_t* src_a,
int stride_a, int stride_a,
const uint8_t* src_b, const uint8_t* src_b,
int stride_b, int stride_b,
int width, int width,
int height); int height);
static const int kMaxPsnr = 128; static const int kMaxPsnr = 128;

View File

@ -90,18 +90,40 @@ extern "C" {
#define HAS_SUMSQUAREERROR_MSA #define HAS_SUMSQUAREERROR_MSA
#endif #endif
uint32_t HammingDistance_C(const uint8_t* src_a, const uint8_t* src_b, int count); uint32_t HammingDistance_C(const uint8_t* src_a,
uint32_t HammingDistance_SSE42(const uint8_t* src_a, const uint8_t* src_b, int count); const uint8_t* src_b,
uint32_t HammingDistance_SSSE3(const uint8_t* src_a, const uint8_t* src_b, int count); int count);
uint32_t HammingDistance_AVX2(const uint8_t* src_a, const uint8_t* src_b, int count); uint32_t HammingDistance_SSE42(const uint8_t* src_a,
uint32_t HammingDistance_NEON(const uint8_t* src_a, const uint8_t* src_b, int count); const uint8_t* src_b,
uint32_t HammingDistance_MSA(const uint8_t* src_a, const uint8_t* src_b, int count); int count);
uint32_t HammingDistance_SSSE3(const uint8_t* src_a,
const uint8_t* src_b,
int count);
uint32_t HammingDistance_AVX2(const uint8_t* src_a,
const uint8_t* src_b,
int count);
uint32_t HammingDistance_NEON(const uint8_t* src_a,
const uint8_t* src_b,
int count);
uint32_t HammingDistance_MSA(const uint8_t* src_a,
const uint8_t* src_b,
int count);
uint32_t SumSquareError_C(const uint8_t* src_a, const uint8_t* src_b, int count); uint32_t SumSquareError_C(const uint8_t* src_a,
uint32_t SumSquareError_SSE2(const uint8_t* src_a, const uint8_t* src_b, int count); const uint8_t* src_b,
uint32_t SumSquareError_AVX2(const uint8_t* src_a, const uint8_t* src_b, int count); int count);
uint32_t SumSquareError_NEON(const uint8_t* src_a, const uint8_t* src_b, int count); uint32_t SumSquareError_SSE2(const uint8_t* src_a,
uint32_t SumSquareError_MSA(const uint8_t* src_a, const uint8_t* src_b, int count); const uint8_t* src_b,
int count);
uint32_t SumSquareError_AVX2(const uint8_t* src_a,
const uint8_t* src_b,
int count);
uint32_t SumSquareError_NEON(const uint8_t* src_a,
const uint8_t* src_b,
int count);
uint32_t SumSquareError_MSA(const uint8_t* src_a,
const uint8_t* src_b,
int count);
uint32_t HashDjb2_C(const uint8_t* src, int count, uint32_t seed); uint32_t HashDjb2_C(const uint8_t* src, int count, uint32_t seed);
uint32_t HashDjb2_SSE41(const uint8_t* src, int count, uint32_t seed); uint32_t HashDjb2_SSE41(const uint8_t* src, int count, uint32_t seed);

View File

@ -352,7 +352,10 @@ int MJPGToI420(const uint8_t* sample,
// Query size of MJPG in pixels. // Query size of MJPG in pixels.
LIBYUV_API LIBYUV_API
int MJPGSize(const uint8_t* sample, size_t sample_size, int* width, int* height); int MJPGSize(const uint8_t* sample,
size_t sample_size,
int* width,
int* height);
#endif #endif
// Convert camera sample to I420 with cropping, rotation and vertical flip. // Convert camera sample to I420 with cropping, rotation and vertical flip.

View File

@ -16,38 +16,38 @@
#include <stdint.h> #include <stdint.h>
#if (__mips_isa_rev >= 6) #if (__mips_isa_rev >= 6)
#define LW(psrc) \ #define LW(psrc) \
({ \ ({ \
uint8_t* psrc_lw_m = (uint8_t*)(psrc); /* NOLINT */ \ uint8_t* psrc_lw_m = (uint8_t*)(psrc); /* NOLINT */ \
uint32_t val_m; \ uint32_t val_m; \
asm volatile("lw %[val_m], %[psrc_lw_m] \n" \ asm volatile("lw %[val_m], %[psrc_lw_m] \n" \
: [val_m] "=r"(val_m) \ : [val_m] "=r"(val_m) \
: [psrc_lw_m] "m"(*psrc_lw_m)); \ : [psrc_lw_m] "m"(*psrc_lw_m)); \
val_m; \ val_m; \
}) })
#if (__mips == 64) #if (__mips == 64)
#define LD(psrc) \ #define LD(psrc) \
({ \ ({ \
uint8_t* psrc_ld_m = (uint8_t*)(psrc); /* NOLINT */ \ uint8_t* psrc_ld_m = (uint8_t*)(psrc); /* NOLINT */ \
uint64_t val_m = 0; \ uint64_t val_m = 0; \
asm volatile("ld %[val_m], %[psrc_ld_m] \n" \ asm volatile("ld %[val_m], %[psrc_ld_m] \n" \
: [val_m] "=r"(val_m) \ : [val_m] "=r"(val_m) \
: [psrc_ld_m] "m"(*psrc_ld_m)); \ : [psrc_ld_m] "m"(*psrc_ld_m)); \
val_m; \ val_m; \
}) })
#else // !(__mips == 64) #else // !(__mips == 64)
#define LD(psrc) \ #define LD(psrc) \
({ \ ({ \
uint8_t* psrc_ld_m = (uint8_t*)(psrc); /* NOLINT */ \ uint8_t* psrc_ld_m = (uint8_t*)(psrc); /* NOLINT */ \
uint32_t val0_m, val1_m; \ uint32_t val0_m, val1_m; \
uint64_t val_m = 0; \ uint64_t val_m = 0; \
val0_m = LW(psrc_ld_m); \ val0_m = LW(psrc_ld_m); \
val1_m = LW(psrc_ld_m + 4); \ val1_m = LW(psrc_ld_m + 4); \
val_m = (uint64_t)(val1_m); /* NOLINT */ \ val_m = (uint64_t)(val1_m); /* NOLINT */ \
val_m = (uint64_t)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \ val_m = (uint64_t)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \
val_m = (uint64_t)(val_m | (uint64_t)val0_m); /* NOLINT */ \ val_m = (uint64_t)(val_m | (uint64_t)val0_m); /* NOLINT */ \
val_m; \ val_m; \
}) })
#endif // (__mips == 64) #endif // (__mips == 64)
@ -81,38 +81,38 @@
}) })
#endif // !(__mips == 64) #endif // !(__mips == 64)
#else // !(__mips_isa_rev >= 6) #else // !(__mips_isa_rev >= 6)
#define LW(psrc) \ #define LW(psrc) \
({ \ ({ \
uint8_t* psrc_lw_m = (uint8_t*)(psrc); /* NOLINT */ \ uint8_t* psrc_lw_m = (uint8_t*)(psrc); /* NOLINT */ \
uint32_t val_m; \ uint32_t val_m; \
asm volatile("ulw %[val_m], %[psrc_lw_m] \n" \ asm volatile("ulw %[val_m], %[psrc_lw_m] \n" \
: [val_m] "=r"(val_m) \ : [val_m] "=r"(val_m) \
: [psrc_lw_m] "m"(*psrc_lw_m)); \ : [psrc_lw_m] "m"(*psrc_lw_m)); \
val_m; \ val_m; \
}) })
#if (__mips == 64) #if (__mips == 64)
#define LD(psrc) \ #define LD(psrc) \
({ \ ({ \
uint8_t* psrc_ld_m = (uint8_t*)(psrc); /* NOLINT */ \ uint8_t* psrc_ld_m = (uint8_t*)(psrc); /* NOLINT */ \
uint64_t val_m = 0; \ uint64_t val_m = 0; \
asm volatile("uld %[val_m], %[psrc_ld_m] \n" \ asm volatile("uld %[val_m], %[psrc_ld_m] \n" \
: [val_m] "=r"(val_m) \ : [val_m] "=r"(val_m) \
: [psrc_ld_m] "m"(*psrc_ld_m)); \ : [psrc_ld_m] "m"(*psrc_ld_m)); \
val_m; \ val_m; \
}) })
#else // !(__mips == 64) #else // !(__mips == 64)
#define LD(psrc) \ #define LD(psrc) \
({ \ ({ \
uint8_t* psrc_ld_m = (uint8_t*)(psrc); /* NOLINT */ \ uint8_t* psrc_ld_m = (uint8_t*)(psrc); /* NOLINT */ \
uint32_t val0_m, val1_m; \ uint32_t val0_m, val1_m; \
uint64_t val_m = 0; \ uint64_t val_m = 0; \
val0_m = LW(psrc_ld_m); \ val0_m = LW(psrc_ld_m); \
val1_m = LW(psrc_ld_m + 4); \ val1_m = LW(psrc_ld_m + 4); \
val_m = (uint64_t)(val1_m); /* NOLINT */ \ val_m = (uint64_t)(val1_m); /* NOLINT */ \
val_m = (uint64_t)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \ val_m = (uint64_t)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \
val_m = (uint64_t)(val_m | (uint64_t)val0_m); /* NOLINT */ \ val_m = (uint64_t)(val_m | (uint64_t)val0_m); /* NOLINT */ \
val_m; \ val_m; \
}) })
#endif // (__mips == 64) #endif // (__mips == 64)

View File

@ -555,7 +555,7 @@ extern const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants); // BT.709
#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a)-1))) #define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a)-1)))
#define align_buffer_64(var, size) \ #define align_buffer_64(var, size) \
uint8_t* var##_mem = (uint8_t*)(malloc((size) + 63)); /* NOLINT */ \ uint8_t* var##_mem = (uint8_t*)(malloc((size) + 63)); /* NOLINT */ \
uint8_t* var = (uint8_t*)(((intptr_t)(var##_mem) + 63) & ~63) /* NOLINT */ uint8_t* var = (uint8_t*)(((intptr_t)(var##_mem) + 63) & ~63) /* NOLINT */
@ -903,8 +903,12 @@ void RGBAToYRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width);
void RGB24ToYRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_y, int width); void RGB24ToYRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
void RAWToYRow_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width); void RAWToYRow_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width);
void RGB565ToYRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_y, int width); void RGB565ToYRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_y, int width);
void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555, uint8_t* dst_y, int width); void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555,
void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444, uint8_t* dst_y, int width); uint8_t* dst_y,
int width);
void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444,
uint8_t* dst_y,
int width);
void BGRAToYRow_MSA(const uint8_t* src_bgra, uint8_t* dst_y, int width); void BGRAToYRow_MSA(const uint8_t* src_bgra, uint8_t* dst_y, int width);
void ABGRToYRow_MSA(const uint8_t* src_abgr, uint8_t* dst_y, int width); void ABGRToYRow_MSA(const uint8_t* src_abgr, uint8_t* dst_y, int width);
void RGBAToYRow_MSA(const uint8_t* src_rgba, uint8_t* dst_y, int width); void RGBAToYRow_MSA(const uint8_t* src_rgba, uint8_t* dst_y, int width);
@ -936,7 +940,9 @@ void ABGRToYRow_Any_NEON(const uint8_t* src_abgr, uint8_t* dst_y, int width);
void RGBAToYRow_Any_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width); void RGBAToYRow_Any_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width);
void RGB24ToYRow_Any_NEON(const uint8_t* src_rgb24, uint8_t* dst_y, int width); void RGB24ToYRow_Any_NEON(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
void RAWToYRow_Any_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width); void RAWToYRow_Any_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width);
void RGB565ToYRow_Any_NEON(const uint8_t* src_rgb565, uint8_t* dst_y, int width); void RGB565ToYRow_Any_NEON(const uint8_t* src_rgb565,
uint8_t* dst_y,
int width);
void ARGB1555ToYRow_Any_NEON(const uint8_t* src_argb1555, void ARGB1555ToYRow_Any_NEON(const uint8_t* src_argb1555,
uint8_t* dst_y, uint8_t* dst_y,
int width); int width);
@ -951,7 +957,9 @@ void ARGBToYRow_Any_MSA(const uint8_t* src_argb, uint8_t* dst_y, int width);
void RGB24ToYRow_Any_MSA(const uint8_t* src_rgb24, uint8_t* dst_y, int width); void RGB24ToYRow_Any_MSA(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
void RAWToYRow_Any_MSA(const uint8_t* src_raw, uint8_t* dst_y, int width); void RAWToYRow_Any_MSA(const uint8_t* src_raw, uint8_t* dst_y, int width);
void RGB565ToYRow_Any_MSA(const uint8_t* src_rgb565, uint8_t* dst_y, int width); void RGB565ToYRow_Any_MSA(const uint8_t* src_rgb565, uint8_t* dst_y, int width);
void ARGB1555ToYRow_Any_MSA(const uint8_t* src_argb1555, uint8_t* dst_y, int width); void ARGB1555ToYRow_Any_MSA(const uint8_t* src_argb1555,
uint8_t* dst_y,
int width);
void ARGBToUVRow_AVX2(const uint8_t* src_argb, void ARGBToUVRow_AVX2(const uint8_t* src_argb,
int src_stride_argb, int src_stride_argb,
@ -1224,7 +1232,10 @@ void MirrorUVRow_MSA(const uint8_t* src_uv,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void MirrorUVRow_C(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width); void MirrorUVRow_C(const uint8_t* src_uv,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width); void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width);
void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width); void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
@ -1236,7 +1247,10 @@ void ARGBMirrorRow_Any_SSE2(const uint8_t* src, uint8_t* dst, int width);
void ARGBMirrorRow_Any_NEON(const uint8_t* src, uint8_t* dst, int width); void ARGBMirrorRow_Any_NEON(const uint8_t* src, uint8_t* dst, int width);
void ARGBMirrorRow_Any_MSA(const uint8_t* src, uint8_t* dst, int width); void ARGBMirrorRow_Any_MSA(const uint8_t* src, uint8_t* dst, int width);
void SplitUVRow_C(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width); void SplitUVRow_C(const uint8_t* src_uv,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void SplitUVRow_SSE2(const uint8_t* src_uv, void SplitUVRow_SSE2(const uint8_t* src_uv,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
@ -1249,7 +1263,10 @@ void SplitUVRow_NEON(const uint8_t* src_uv,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
int width); int width);
void SplitUVRow_MSA(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width); void SplitUVRow_MSA(const uint8_t* src_uv,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void SplitUVRow_Any_SSE2(const uint8_t* src_uv, void SplitUVRow_Any_SSE2(const uint8_t* src_uv,
uint8_t* dst_u, uint8_t* dst_u,
uint8_t* dst_v, uint8_t* dst_v,
@ -1371,9 +1388,15 @@ void MultiplyRow_16_AVX2(const uint16_t* src_y,
uint16_t* dst_y, uint16_t* dst_y,
int scale, int scale,
int width); int width);
void MultiplyRow_16_C(const uint16_t* src_y, uint16_t* dst_y, int scale, int width); void MultiplyRow_16_C(const uint16_t* src_y,
uint16_t* dst_y,
int scale,
int width);
void Convert8To16Row_C(const uint8_t* src_y, uint16_t* dst_y, int scale, int width); void Convert8To16Row_C(const uint8_t* src_y,
uint16_t* dst_y,
int scale,
int width);
void Convert8To16Row_SSE2(const uint8_t* src_y, void Convert8To16Row_SSE2(const uint8_t* src_y,
uint16_t* dst_y, uint16_t* dst_y,
int scale, int scale,
@ -1391,7 +1414,10 @@ void Convert8To16Row_Any_AVX2(const uint8_t* src_y,
int scale, int scale,
int width); int width);
void Convert16To8Row_C(const uint16_t* src_y, uint8_t* dst_y, int scale, int width); void Convert16To8Row_C(const uint16_t* src_y,
uint8_t* dst_y,
int scale,
int width);
void Convert16To8Row_SSSE3(const uint16_t* src_y, void Convert16To8Row_SSSE3(const uint16_t* src_y,
uint8_t* dst_y, uint8_t* dst_y,
int scale, int scale,
@ -1422,8 +1448,12 @@ void CopyRow_Any_NEON(const uint8_t* src, uint8_t* dst, int count);
void CopyRow_16_C(const uint16_t* src, uint16_t* dst, int count); void CopyRow_16_C(const uint16_t* src, uint16_t* dst, int count);
void ARGBCopyAlphaRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width); void ARGBCopyAlphaRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width);
void ARGBCopyAlphaRow_SSE2(const uint8_t* src_argb, uint8_t* dst_argb, int width); void ARGBCopyAlphaRow_SSE2(const uint8_t* src_argb,
void ARGBCopyAlphaRow_AVX2(const uint8_t* src_argb, uint8_t* dst_argb, int width); uint8_t* dst_argb,
int width);
void ARGBCopyAlphaRow_AVX2(const uint8_t* src_argb,
uint8_t* dst_argb,
int width);
void ARGBCopyAlphaRow_Any_SSE2(const uint8_t* src_argb, void ARGBCopyAlphaRow_Any_SSE2(const uint8_t* src_argb,
uint8_t* dst_argb, uint8_t* dst_argb,
int width); int width);
@ -1432,10 +1462,18 @@ void ARGBCopyAlphaRow_Any_AVX2(const uint8_t* src_argb,
int width); int width);
void ARGBExtractAlphaRow_C(const uint8_t* src_argb, uint8_t* dst_a, int width); void ARGBExtractAlphaRow_C(const uint8_t* src_argb, uint8_t* dst_a, int width);
void ARGBExtractAlphaRow_SSE2(const uint8_t* src_argb, uint8_t* dst_a, int width); void ARGBExtractAlphaRow_SSE2(const uint8_t* src_argb,
void ARGBExtractAlphaRow_AVX2(const uint8_t* src_argb, uint8_t* dst_a, int width); uint8_t* dst_a,
void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb, uint8_t* dst_a, int width); int width);
void ARGBExtractAlphaRow_MSA(const uint8_t* src_argb, uint8_t* dst_a, int width); void ARGBExtractAlphaRow_AVX2(const uint8_t* src_argb,
uint8_t* dst_a,
int width);
void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb,
uint8_t* dst_a,
int width);
void ARGBExtractAlphaRow_MSA(const uint8_t* src_argb,
uint8_t* dst_a,
int width);
void ARGBExtractAlphaRow_Any_SSE2(const uint8_t* src_argb, void ARGBExtractAlphaRow_Any_SSE2(const uint8_t* src_argb,
uint8_t* dst_a, uint8_t* dst_a,
int width); int width);
@ -1450,8 +1488,12 @@ void ARGBExtractAlphaRow_Any_MSA(const uint8_t* src_argb,
int width); int width);
void ARGBCopyYToAlphaRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width); void ARGBCopyYToAlphaRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width);
void ARGBCopyYToAlphaRow_SSE2(const uint8_t* src_y, uint8_t* dst_argb, int width); void ARGBCopyYToAlphaRow_SSE2(const uint8_t* src_y,
void ARGBCopyYToAlphaRow_AVX2(const uint8_t* src_y, uint8_t* dst_argb, int width); uint8_t* dst_argb,
int width);
void ARGBCopyYToAlphaRow_AVX2(const uint8_t* src_y,
uint8_t* dst_argb,
int width);
void ARGBCopyYToAlphaRow_Any_SSE2(const uint8_t* src_y, void ARGBCopyYToAlphaRow_Any_SSE2(const uint8_t* src_y,
uint8_t* dst_argb, uint8_t* dst_argb,
int width); int width);
@ -1512,17 +1554,23 @@ void ARGBShuffleRow_Any_MSA(const uint8_t* src_argb,
const uint8_t* shuffler, const uint8_t* shuffler,
int width); int width);
void RGB24ToARGBRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_argb, int width); void RGB24ToARGBRow_SSSE3(const uint8_t* src_rgb24,
uint8_t* dst_argb,
int width);
void RAWToARGBRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_argb, int width); void RAWToARGBRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_argb, int width);
void RAWToRGB24Row_SSSE3(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); void RAWToRGB24Row_SSSE3(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
void RGB565ToARGBRow_SSE2(const uint8_t* src_rgb565, uint8_t* dst_argb, int width); void RGB565ToARGBRow_SSE2(const uint8_t* src_rgb565,
uint8_t* dst_argb,
int width);
void ARGB1555ToARGBRow_SSE2(const uint8_t* src_argb1555, void ARGB1555ToARGBRow_SSE2(const uint8_t* src_argb1555,
uint8_t* dst_argb, uint8_t* dst_argb,
int width); int width);
void ARGB4444ToARGBRow_SSE2(const uint8_t* src_argb4444, void ARGB4444ToARGBRow_SSE2(const uint8_t* src_argb4444,
uint8_t* dst_argb, uint8_t* dst_argb,
int width); int width);
void RGB565ToARGBRow_AVX2(const uint8_t* src_rgb565, uint8_t* dst_argb, int width); void RGB565ToARGBRow_AVX2(const uint8_t* src_rgb565,
uint8_t* dst_argb,
int width);
void ARGB1555ToARGBRow_AVX2(const uint8_t* src_argb1555, void ARGB1555ToARGBRow_AVX2(const uint8_t* src_argb1555,
uint8_t* dst_argb, uint8_t* dst_argb,
int width); int width);
@ -1530,14 +1578,20 @@ void ARGB4444ToARGBRow_AVX2(const uint8_t* src_argb4444,
uint8_t* dst_argb, uint8_t* dst_argb,
int width); int width);
void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_argb, int width); void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24,
uint8_t* dst_argb,
int width);
void RGB24ToARGBRow_MSA(const uint8_t* src_rgb24, uint8_t* dst_argb, int width); void RGB24ToARGBRow_MSA(const uint8_t* src_rgb24, uint8_t* dst_argb, int width);
void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width); void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width);
void RAWToARGBRow_MSA(const uint8_t* src_raw, uint8_t* dst_argb, int width); void RAWToARGBRow_MSA(const uint8_t* src_raw, uint8_t* dst_argb, int width);
void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
void RAWToRGB24Row_MSA(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); void RAWToRGB24Row_MSA(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_argb, int width); void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565,
void RGB565ToARGBRow_MSA(const uint8_t* src_rgb565, uint8_t* dst_argb, int width); uint8_t* dst_argb,
int width);
void RGB565ToARGBRow_MSA(const uint8_t* src_rgb565,
uint8_t* dst_argb,
int width);
void ARGB1555ToARGBRow_NEON(const uint8_t* src_argb1555, void ARGB1555ToARGBRow_NEON(const uint8_t* src_argb1555,
uint8_t* dst_argb, uint8_t* dst_argb,
int width); int width);
@ -1560,8 +1614,12 @@ void AR30ToARGBRow_C(const uint8_t* src_ar30, uint8_t* dst_argb, int width);
void RGB24ToARGBRow_Any_SSSE3(const uint8_t* src_rgb24, void RGB24ToARGBRow_Any_SSSE3(const uint8_t* src_rgb24,
uint8_t* dst_argb, uint8_t* dst_argb,
int width); int width);
void RAWToARGBRow_Any_SSSE3(const uint8_t* src_raw, uint8_t* dst_argb, int width); void RAWToARGBRow_Any_SSSE3(const uint8_t* src_raw,
void RAWToRGB24Row_Any_SSSE3(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); uint8_t* dst_argb,
int width);
void RAWToRGB24Row_Any_SSSE3(const uint8_t* src_raw,
uint8_t* dst_rgb24,
int width);
void RGB565ToARGBRow_Any_SSE2(const uint8_t* src_rgb565, void RGB565ToARGBRow_Any_SSE2(const uint8_t* src_rgb565,
uint8_t* dst_argb, uint8_t* dst_argb,
@ -1585,11 +1643,19 @@ void ARGB4444ToARGBRow_Any_AVX2(const uint8_t* src_argb4444,
void RGB24ToARGBRow_Any_NEON(const uint8_t* src_rgb24, void RGB24ToARGBRow_Any_NEON(const uint8_t* src_rgb24,
uint8_t* dst_argb, uint8_t* dst_argb,
int width); int width);
void RGB24ToARGBRow_Any_MSA(const uint8_t* src_rgb24, uint8_t* dst_argb, int width); void RGB24ToARGBRow_Any_MSA(const uint8_t* src_rgb24,
void RAWToARGBRow_Any_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width); uint8_t* dst_argb,
int width);
void RAWToARGBRow_Any_NEON(const uint8_t* src_raw,
uint8_t* dst_argb,
int width);
void RAWToARGBRow_Any_MSA(const uint8_t* src_raw, uint8_t* dst_argb, int width); void RAWToARGBRow_Any_MSA(const uint8_t* src_raw, uint8_t* dst_argb, int width);
void RAWToRGB24Row_Any_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); void RAWToRGB24Row_Any_NEON(const uint8_t* src_raw,
void RAWToRGB24Row_Any_MSA(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); uint8_t* dst_rgb24,
int width);
void RAWToRGB24Row_Any_MSA(const uint8_t* src_raw,
uint8_t* dst_rgb24,
int width);
void RGB565ToARGBRow_Any_NEON(const uint8_t* src_rgb565, void RGB565ToARGBRow_Any_NEON(const uint8_t* src_rgb565,
uint8_t* dst_argb, uint8_t* dst_argb,
int width); int width);
@ -1613,8 +1679,12 @@ void ARGB4444ToARGBRow_Any_MSA(const uint8_t* src_argb4444,
void ARGBToRGB24Row_SSSE3(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRGB24Row_SSSE3(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToRAWRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRAWRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToRGB565Row_SSE2(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRGB565Row_SSE2(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToARGB1555Row_SSE2(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToARGB1555Row_SSE2(const uint8_t* src_argb,
void ARGBToARGB4444Row_SSE2(const uint8_t* src_argb, uint8_t* dst_rgb, int width); uint8_t* dst_rgb,
int width);
void ARGBToARGB4444Row_SSE2(const uint8_t* src_argb,
uint8_t* dst_rgb,
int width);
void ARGBToAR30Row_SSSE3(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToAR30Row_SSSE3(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToRGB565DitherRow_C(const uint8_t* src_argb, void ARGBToRGB565DitherRow_C(const uint8_t* src_argb,
@ -1631,15 +1701,23 @@ void ARGBToRGB565DitherRow_AVX2(const uint8_t* src_argb,
int width); int width);
void ARGBToRGB565Row_AVX2(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRGB565Row_AVX2(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToARGB1555Row_AVX2(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToARGB1555Row_AVX2(const uint8_t* src_argb,
void ARGBToARGB4444Row_AVX2(const uint8_t* src_argb, uint8_t* dst_rgb, int width); uint8_t* dst_rgb,
int width);
void ARGBToARGB4444Row_AVX2(const uint8_t* src_argb,
uint8_t* dst_rgb,
int width);
void ARGBToAR30Row_AVX2(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToAR30Row_AVX2(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToRGB24Row_NEON(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRGB24Row_NEON(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToRGB565Row_NEON(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRGB565Row_NEON(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToARGB1555Row_NEON(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToARGB1555Row_NEON(const uint8_t* src_argb,
void ARGBToARGB4444Row_NEON(const uint8_t* src_argb, uint8_t* dst_rgb, int width); uint8_t* dst_rgb,
int width);
void ARGBToARGB4444Row_NEON(const uint8_t* src_argb,
uint8_t* dst_rgb,
int width);
void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb, void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb,
uint8_t* dst_rgb, uint8_t* dst_rgb,
const uint32_t dither4, const uint32_t dither4,
@ -1647,8 +1725,12 @@ void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb,
void ARGBToRGB24Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRGB24Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToRAWRow_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRAWRow_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToRGB565Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRGB565Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToARGB1555Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToARGB1555Row_MSA(const uint8_t* src_argb,
void ARGBToARGB4444Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width); uint8_t* dst_rgb,
int width);
void ARGBToARGB4444Row_MSA(const uint8_t* src_argb,
uint8_t* dst_rgb,
int width);
void ARGBToRGB565DitherRow_MSA(const uint8_t* src_argb, void ARGBToRGB565DitherRow_MSA(const uint8_t* src_argb,
uint8_t* dst_rgb, uint8_t* dst_rgb,
const uint32_t dither4, const uint32_t dither4,
@ -2283,16 +2365,24 @@ void ARGBSubtractRow_Any_MSA(const uint8_t* src_argb,
uint8_t* dst_argb, uint8_t* dst_argb,
int width); int width);
void ARGBToRGB24Row_Any_SSSE3(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRGB24Row_Any_SSSE3(const uint8_t* src_argb,
void ARGBToRAWRow_Any_SSSE3(const uint8_t* src_argb, uint8_t* dst_rgb, int width); uint8_t* dst_rgb,
void ARGBToRGB565Row_Any_SSE2(const uint8_t* src_argb, uint8_t* dst_rgb, int width); int width);
void ARGBToRAWRow_Any_SSSE3(const uint8_t* src_argb,
uint8_t* dst_rgb,
int width);
void ARGBToRGB565Row_Any_SSE2(const uint8_t* src_argb,
uint8_t* dst_rgb,
int width);
void ARGBToARGB1555Row_Any_SSE2(const uint8_t* src_argb, void ARGBToARGB1555Row_Any_SSE2(const uint8_t* src_argb,
uint8_t* dst_rgb, uint8_t* dst_rgb,
int width); int width);
void ARGBToARGB4444Row_Any_SSE2(const uint8_t* src_argb, void ARGBToARGB4444Row_Any_SSE2(const uint8_t* src_argb,
uint8_t* dst_rgb, uint8_t* dst_rgb,
int width); int width);
void ARGBToAR30Row_Any_SSSE3(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToAR30Row_Any_SSSE3(const uint8_t* src_argb,
uint8_t* dst_rgb,
int width);
void ARGBToRGB565DitherRow_Any_SSE2(const uint8_t* src_argb, void ARGBToRGB565DitherRow_Any_SSE2(const uint8_t* src_argb,
uint8_t* dst_rgb, uint8_t* dst_rgb,
@ -2303,18 +2393,28 @@ void ARGBToRGB565DitherRow_Any_AVX2(const uint8_t* src_argb,
const uint32_t dither4, const uint32_t dither4,
int width); int width);
void ARGBToRGB565Row_Any_AVX2(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRGB565Row_Any_AVX2(const uint8_t* src_argb,
uint8_t* dst_rgb,
int width);
void ARGBToARGB1555Row_Any_AVX2(const uint8_t* src_argb, void ARGBToARGB1555Row_Any_AVX2(const uint8_t* src_argb,
uint8_t* dst_rgb, uint8_t* dst_rgb,
int width); int width);
void ARGBToARGB4444Row_Any_AVX2(const uint8_t* src_argb, void ARGBToARGB4444Row_Any_AVX2(const uint8_t* src_argb,
uint8_t* dst_rgb, uint8_t* dst_rgb,
int width); int width);
void ARGBToAR30Row_Any_AVX2(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToAR30Row_Any_AVX2(const uint8_t* src_argb,
uint8_t* dst_rgb,
int width);
void ARGBToRGB24Row_Any_NEON(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRGB24Row_Any_NEON(const uint8_t* src_argb,
void ARGBToRAWRow_Any_NEON(const uint8_t* src_argb, uint8_t* dst_rgb, int width); uint8_t* dst_rgb,
void ARGBToRGB565Row_Any_NEON(const uint8_t* src_argb, uint8_t* dst_rgb, int width); int width);
void ARGBToRAWRow_Any_NEON(const uint8_t* src_argb,
uint8_t* dst_rgb,
int width);
void ARGBToRGB565Row_Any_NEON(const uint8_t* src_argb,
uint8_t* dst_rgb,
int width);
void ARGBToARGB1555Row_Any_NEON(const uint8_t* src_argb, void ARGBToARGB1555Row_Any_NEON(const uint8_t* src_argb,
uint8_t* dst_rgb, uint8_t* dst_rgb,
int width); int width);
@ -2325,9 +2425,13 @@ void ARGBToRGB565DitherRow_Any_NEON(const uint8_t* src_argb,
uint8_t* dst_rgb, uint8_t* dst_rgb,
const uint32_t dither4, const uint32_t dither4,
int width); int width);
void ARGBToRGB24Row_Any_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRGB24Row_Any_MSA(const uint8_t* src_argb,
uint8_t* dst_rgb,
int width);
void ARGBToRAWRow_Any_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRAWRow_Any_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
void ARGBToRGB565Row_Any_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRGB565Row_Any_MSA(const uint8_t* src_argb,
uint8_t* dst_rgb,
int width);
void ARGBToARGB1555Row_Any_MSA(const uint8_t* src_argb, void ARGBToARGB1555Row_Any_MSA(const uint8_t* src_argb,
uint8_t* dst_rgb, uint8_t* dst_rgb,
int width); int width);
@ -2749,10 +2853,18 @@ void I422ToUYVYRow_Any_MSA(const uint8_t* src_y,
// Effects related row functions. // Effects related row functions.
void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width); void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width);
void ARGBAttenuateRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_argb, int width); void ARGBAttenuateRow_SSSE3(const uint8_t* src_argb,
void ARGBAttenuateRow_AVX2(const uint8_t* src_argb, uint8_t* dst_argb, int width); uint8_t* dst_argb,
void ARGBAttenuateRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width); int width);
void ARGBAttenuateRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, int width); void ARGBAttenuateRow_AVX2(const uint8_t* src_argb,
uint8_t* dst_argb,
int width);
void ARGBAttenuateRow_NEON(const uint8_t* src_argb,
uint8_t* dst_argb,
int width);
void ARGBAttenuateRow_MSA(const uint8_t* src_argb,
uint8_t* dst_argb,
int width);
void ARGBAttenuateRow_Any_SSSE3(const uint8_t* src_argb, void ARGBAttenuateRow_Any_SSSE3(const uint8_t* src_argb,
uint8_t* dst_argb, uint8_t* dst_argb,
int width); int width);
@ -2768,9 +2880,15 @@ void ARGBAttenuateRow_Any_MSA(const uint8_t* src_argb,
// Inverse table for unattenuate, shared by C and SSE2. // Inverse table for unattenuate, shared by C and SSE2.
extern const uint32_t fixed_invtbl8[256]; extern const uint32_t fixed_invtbl8[256];
void ARGBUnattenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width); void ARGBUnattenuateRow_C(const uint8_t* src_argb,
void ARGBUnattenuateRow_SSE2(const uint8_t* src_argb, uint8_t* dst_argb, int width); uint8_t* dst_argb,
void ARGBUnattenuateRow_AVX2(const uint8_t* src_argb, uint8_t* dst_argb, int width); int width);
void ARGBUnattenuateRow_SSE2(const uint8_t* src_argb,
uint8_t* dst_argb,
int width);
void ARGBUnattenuateRow_AVX2(const uint8_t* src_argb,
uint8_t* dst_argb,
int width);
void ARGBUnattenuateRow_Any_SSE2(const uint8_t* src_argb, void ARGBUnattenuateRow_Any_SSE2(const uint8_t* src_argb,
uint8_t* dst_argb, uint8_t* dst_argb,
int width); int width);
@ -2805,11 +2923,19 @@ void ARGBColorMatrixRow_MSA(const uint8_t* src_argb,
const int8_t* matrix_argb, const int8_t* matrix_argb,
int width); int width);
void ARGBColorTableRow_C(uint8_t* dst_argb, const uint8_t* table_argb, int width); void ARGBColorTableRow_C(uint8_t* dst_argb,
void ARGBColorTableRow_X86(uint8_t* dst_argb, const uint8_t* table_argb, int width); const uint8_t* table_argb,
int width);
void ARGBColorTableRow_X86(uint8_t* dst_argb,
const uint8_t* table_argb,
int width);
void RGBColorTableRow_C(uint8_t* dst_argb, const uint8_t* table_argb, int width); void RGBColorTableRow_C(uint8_t* dst_argb,
void RGBColorTableRow_X86(uint8_t* dst_argb, const uint8_t* table_argb, int width); const uint8_t* table_argb,
int width);
void RGBColorTableRow_X86(uint8_t* dst_argb,
const uint8_t* table_argb,
int width);
void ARGBQuantizeRow_C(uint8_t* dst_argb, void ARGBQuantizeRow_C(uint8_t* dst_argb,
int scale, int scale,
@ -3075,37 +3201,58 @@ void ARGBPolynomialRow_AVX2(const uint8_t* src_argb,
// Scale and convert to half float. // Scale and convert to half float.
void HalfFloatRow_C(const uint16_t* src, uint16_t* dst, float scale, int width); void HalfFloatRow_C(const uint16_t* src, uint16_t* dst, float scale, int width);
void HalfFloatRow_SSE2(const uint16_t* src, uint16_t* dst, float scale, int width); void HalfFloatRow_SSE2(const uint16_t* src,
uint16_t* dst,
float scale,
int width);
void HalfFloatRow_Any_SSE2(const uint16_t* src, void HalfFloatRow_Any_SSE2(const uint16_t* src,
uint16_t* dst, uint16_t* dst,
float scale, float scale,
int width); int width);
void HalfFloatRow_AVX2(const uint16_t* src, uint16_t* dst, float scale, int width); void HalfFloatRow_AVX2(const uint16_t* src,
uint16_t* dst,
float scale,
int width);
void HalfFloatRow_Any_AVX2(const uint16_t* src, void HalfFloatRow_Any_AVX2(const uint16_t* src,
uint16_t* dst, uint16_t* dst,
float scale, float scale,
int width); int width);
void HalfFloatRow_F16C(const uint16_t* src, uint16_t* dst, float scale, int width); void HalfFloatRow_F16C(const uint16_t* src,
uint16_t* dst,
float scale,
int width);
void HalfFloatRow_Any_F16C(const uint16_t* src, void HalfFloatRow_Any_F16C(const uint16_t* src,
uint16_t* dst, uint16_t* dst,
float scale, float scale,
int width); int width);
void HalfFloat1Row_F16C(const uint16_t* src, uint16_t* dst, float scale, int width); void HalfFloat1Row_F16C(const uint16_t* src,
uint16_t* dst,
float scale,
int width);
void HalfFloat1Row_Any_F16C(const uint16_t* src, void HalfFloat1Row_Any_F16C(const uint16_t* src,
uint16_t* dst, uint16_t* dst,
float scale, float scale,
int width); int width);
void HalfFloatRow_NEON(const uint16_t* src, uint16_t* dst, float scale, int width); void HalfFloatRow_NEON(const uint16_t* src,
uint16_t* dst,
float scale,
int width);
void HalfFloatRow_Any_NEON(const uint16_t* src, void HalfFloatRow_Any_NEON(const uint16_t* src,
uint16_t* dst, uint16_t* dst,
float scale, float scale,
int width); int width);
void HalfFloat1Row_NEON(const uint16_t* src, uint16_t* dst, float scale, int width); void HalfFloat1Row_NEON(const uint16_t* src,
uint16_t* dst,
float scale,
int width);
void HalfFloat1Row_Any_NEON(const uint16_t* src, void HalfFloat1Row_Any_NEON(const uint16_t* src,
uint16_t* dst, uint16_t* dst,
float scale, float scale,
int width); int width);
void HalfFloatRow_MSA(const uint16_t* src, uint16_t* dst, float scale, int width); void HalfFloatRow_MSA(const uint16_t* src,
uint16_t* dst,
float scale,
int width);
void HalfFloatRow_Any_MSA(const uint16_t* src, void HalfFloatRow_Any_MSA(const uint16_t* src,
uint16_t* dst, uint16_t* dst,
float scale, float scale,

View File

@ -302,7 +302,9 @@ void ScaleRowDown38_2_Box_16_C(const uint16_t* src_ptr,
uint16_t* dst_ptr, uint16_t* dst_ptr,
int dst_width); int dst_width);
void ScaleAddRow_C(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width); void ScaleAddRow_C(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width);
void ScaleAddRow_16_C(const uint16_t* src_ptr, uint32_t* dst_ptr, int src_width); void ScaleAddRow_16_C(const uint16_t* src_ptr,
uint32_t* dst_ptr,
int src_width);
void ScaleARGBRowDown2_C(const uint8_t* src_argb, void ScaleARGBRowDown2_C(const uint8_t* src_argb,
ptrdiff_t src_stride, ptrdiff_t src_stride,
uint8_t* dst_argb, uint8_t* dst_argb,
@ -493,8 +495,12 @@ void ScaleRowDown38_2_Box_Any_SSSE3(const uint8_t* src_ptr,
void ScaleAddRow_SSE2(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width); void ScaleAddRow_SSE2(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width);
void ScaleAddRow_AVX2(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width); void ScaleAddRow_AVX2(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width);
void ScaleAddRow_Any_SSE2(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width); void ScaleAddRow_Any_SSE2(const uint8_t* src_ptr,
void ScaleAddRow_Any_AVX2(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width); uint16_t* dst_ptr,
int src_width);
void ScaleAddRow_Any_AVX2(const uint8_t* src_ptr,
uint16_t* dst_ptr,
int src_width);
void ScaleFilterCols_SSSE3(uint8_t* dst_ptr, void ScaleFilterCols_SSSE3(uint8_t* dst_ptr,
const uint8_t* src_ptr, const uint8_t* src_ptr,
@ -810,7 +816,9 @@ void ScaleRowDown38_2_Box_Any_NEON(const uint8_t* src_ptr,
int dst_width); int dst_width);
void ScaleAddRow_NEON(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width); void ScaleAddRow_NEON(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width);
void ScaleAddRow_Any_NEON(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width); void ScaleAddRow_Any_NEON(const uint8_t* src_ptr,
uint16_t* dst_ptr,
int src_width);
void ScaleFilterCols_NEON(uint8_t* dst_ptr, void ScaleFilterCols_NEON(uint8_t* dst_ptr,
const uint8_t* src_ptr, const uint8_t* src_ptr,

View File

@ -28,11 +28,11 @@ extern "C" {
// Needs to be a macro otherwise the OS X compiler complains when the kFormat* // Needs to be a macro otherwise the OS X compiler complains when the kFormat*
// constants are used in a switch. // constants are used in a switch.
#ifdef __cplusplus #ifdef __cplusplus
#define FOURCC(a, b, c, d) \ #define FOURCC(a, b, c, d) \
((static_cast<uint32_t>(a)) | (static_cast<uint32_t>(b) << 8) | \ ((static_cast<uint32_t>(a)) | (static_cast<uint32_t>(b) << 8) | \
(static_cast<uint32_t>(c) << 16) | (static_cast<uint32_t>(d) << 24)) (static_cast<uint32_t>(c) << 16) | (static_cast<uint32_t>(d) << 24))
#else #else
#define FOURCC(a, b, c, d) \ #define FOURCC(a, b, c, d) \
(((uint32_t)(a)) | ((uint32_t)(b) << 8) | /* NOLINT */ \ (((uint32_t)(a)) | ((uint32_t)(b) << 8) | /* NOLINT */ \
((uint32_t)(c) << 16) | ((uint32_t)(d) << 24)) /* NOLINT */ ((uint32_t)(c) << 16) | ((uint32_t)(d) << 24)) /* NOLINT */
#endif #endif

View File

@ -32,7 +32,8 @@ LIBYUV_API
uint32_t HashDjb2(const uint8_t* src, uint64_t count, uint32_t seed) { uint32_t HashDjb2(const uint8_t* src, uint64_t count, uint32_t seed) {
const int kBlockSize = 1 << 15; // 32768; const int kBlockSize = 1 << 15; // 32768;
int remainder; int remainder;
uint32_t (*HashDjb2_SSE)(const uint8_t* src, int count, uint32_t seed) = HashDjb2_C; uint32_t (*HashDjb2_SSE)(const uint8_t* src, int count, uint32_t seed) =
HashDjb2_C;
#if defined(HAS_HASHDJB2_SSE41) #if defined(HAS_HASHDJB2_SSE41)
if (TestCpuFlag(kCpuHasSSE41)) { if (TestCpuFlag(kCpuHasSSE41)) {
HashDjb2_SSE = HashDjb2_SSE41; HashDjb2_SSE = HashDjb2_SSE41;
@ -93,7 +94,10 @@ static uint32_t ARGBDetectRow_C(const uint8_t* argb, int width) {
// Scan an opaque argb image and return fourcc based on alpha offset. // Scan an opaque argb image and return fourcc based on alpha offset.
// Returns FOURCC_ARGB, FOURCC_BGRA, or 0 if unknown. // Returns FOURCC_ARGB, FOURCC_BGRA, or 0 if unknown.
LIBYUV_API LIBYUV_API
uint32_t ARGBDetect(const uint8_t* argb, int stride_argb, int width, int height) { uint32_t ARGBDetect(const uint8_t* argb,
int stride_argb,
int width,
int height) {
uint32_t fourcc = 0; uint32_t fourcc = 0;
int h; int h;
@ -115,16 +119,16 @@ uint32_t ARGBDetect(const uint8_t* argb, int stride_argb, int width, int height)
LIBYUV_API LIBYUV_API
uint64_t ComputeHammingDistance(const uint8_t* src_a, uint64_t ComputeHammingDistance(const uint8_t* src_a,
const uint8_t* src_b, const uint8_t* src_b,
int count) { int count) {
const int kBlockSize = 1 << 15; // 32768; const int kBlockSize = 1 << 15; // 32768;
const int kSimdSize = 64; const int kSimdSize = 64;
// SIMD for multiple of 64, and C for remainder // SIMD for multiple of 64, and C for remainder
int remainder = count & (kBlockSize - 1) & ~(kSimdSize - 1); int remainder = count & (kBlockSize - 1) & ~(kSimdSize - 1);
uint64_t diff = 0; uint64_t diff = 0;
int i; int i;
uint32_t (*HammingDistance)(const uint8_t* src_a, const uint8_t* src_b, int count) = uint32_t (*HammingDistance)(const uint8_t* src_a, const uint8_t* src_b,
HammingDistance_C; int count) = HammingDistance_C;
#if defined(HAS_HAMMINGDISTANCE_NEON) #if defined(HAS_HAMMINGDISTANCE_NEON)
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
HammingDistance = HammingDistance_NEON; HammingDistance = HammingDistance_NEON;
@ -173,8 +177,8 @@ uint64_t ComputeHammingDistance(const uint8_t* src_a,
// TODO(fbarchard): Refactor into row function. // TODO(fbarchard): Refactor into row function.
LIBYUV_API LIBYUV_API
uint64_t ComputeSumSquareError(const uint8_t* src_a, uint64_t ComputeSumSquareError(const uint8_t* src_a,
const uint8_t* src_b, const uint8_t* src_b,
int count) { int count) {
// SumSquareError returns values 0 to 65535 for each squared difference. // SumSquareError returns values 0 to 65535 for each squared difference.
// Up to 65536 of those can be summed and remain within a uint32_t. // Up to 65536 of those can be summed and remain within a uint32_t.
// After each block of 65536 pixels, accumulate into a uint64_t. // After each block of 65536 pixels, accumulate into a uint64_t.
@ -182,8 +186,8 @@ uint64_t ComputeSumSquareError(const uint8_t* src_a,
int remainder = count & (kBlockSize - 1) & ~31; int remainder = count & (kBlockSize - 1) & ~31;
uint64_t sse = 0; uint64_t sse = 0;
int i; int i;
uint32_t (*SumSquareError)(const uint8_t* src_a, const uint8_t* src_b, int count) = uint32_t (*SumSquareError)(const uint8_t* src_a, const uint8_t* src_b,
SumSquareError_C; int count) = SumSquareError_C;
#if defined(HAS_SUMSQUAREERROR_NEON) #if defined(HAS_SUMSQUAREERROR_NEON)
if (TestCpuFlag(kCpuHasNEON)) { if (TestCpuFlag(kCpuHasNEON)) {
SumSquareError = SumSquareError_NEON; SumSquareError = SumSquareError_NEON;
@ -228,11 +232,11 @@ uint64_t ComputeSumSquareError(const uint8_t* src_a,
LIBYUV_API LIBYUV_API
uint64_t ComputeSumSquareErrorPlane(const uint8_t* src_a, uint64_t ComputeSumSquareErrorPlane(const uint8_t* src_a,
int stride_a, int stride_a,
const uint8_t* src_b, const uint8_t* src_b,
int stride_b, int stride_b,
int width, int width,
int height) { int height) {
uint64_t sse = 0; uint64_t sse = 0;
int h; int h;
// Coalesce rows. // Coalesce rows.
@ -274,7 +278,7 @@ double CalcFramePsnr(const uint8_t* src_a,
int height) { int height) {
const uint64_t samples = (uint64_t)width * (uint64_t)height; const uint64_t samples = (uint64_t)width * (uint64_t)height;
const uint64_t sse = ComputeSumSquareErrorPlane(src_a, stride_a, src_b, const uint64_t sse = ComputeSumSquareErrorPlane(src_a, stride_a, src_b,
stride_b, width, height); stride_b, width, height);
return SumSquareErrorToPsnr(sse, samples); return SumSquareErrorToPsnr(sse, samples);
} }
@ -293,8 +297,8 @@ double I420Psnr(const uint8_t* src_y_a,
int stride_v_b, int stride_v_b,
int width, int width,
int height) { int height) {
const uint64_t sse_y = ComputeSumSquareErrorPlane(src_y_a, stride_y_a, src_y_b, const uint64_t sse_y = ComputeSumSquareErrorPlane(
stride_y_b, width, height); src_y_a, stride_y_a, src_y_b, stride_y_b, width, height);
const int width_uv = (width + 1) >> 1; const int width_uv = (width + 1) >> 1;
const int height_uv = (height + 1) >> 1; const int height_uv = (height + 1) >> 1;
const uint64_t sse_u = ComputeSumSquareErrorPlane( const uint64_t sse_u = ComputeSumSquareErrorPlane(
@ -302,7 +306,7 @@ double I420Psnr(const uint8_t* src_y_a,
const uint64_t sse_v = ComputeSumSquareErrorPlane( const uint64_t sse_v = ComputeSumSquareErrorPlane(
src_v_a, stride_v_a, src_v_b, stride_v_b, width_uv, height_uv); src_v_a, stride_v_a, src_v_b, stride_v_b, width_uv, height_uv);
const uint64_t samples = (uint64_t)width * (uint64_t)height + const uint64_t samples = (uint64_t)width * (uint64_t)height +
2 * ((uint64_t)width_uv * (uint64_t)height_uv); 2 * ((uint64_t)width_uv * (uint64_t)height_uv);
const uint64_t sse = sse_y + sse_u + sse_v; const uint64_t sse = sse_y + sse_u + sse_v;
return SumSquareErrorToPsnr(sse, samples); return SumSquareErrorToPsnr(sse, samples);
} }
@ -344,7 +348,7 @@ static double Ssim8x8_C(const uint8_t* src_a,
const int64_t sum_a_x_sum_b = sum_a * sum_b; const int64_t sum_a_x_sum_b = sum_a * sum_b;
const int64_t ssim_n = (2 * sum_a_x_sum_b + c1) * const int64_t ssim_n = (2 * sum_a_x_sum_b + c1) *
(2 * count * sum_axb - 2 * sum_a_x_sum_b + c2); (2 * count * sum_axb - 2 * sum_a_x_sum_b + c2);
const int64_t sum_a_sq = sum_a * sum_a; const int64_t sum_a_sq = sum_a * sum_a;
const int64_t sum_b_sq = sum_b * sum_b; const int64_t sum_b_sq = sum_b * sum_b;

View File

@ -18,7 +18,9 @@ extern "C" {
#endif #endif
#if ORIGINAL_OPT #if ORIGINAL_OPT
uint32_t HammingDistance_C1(const uint8_t* src_a, const uint8_t* src_b, int count) { uint32_t HammingDistance_C1(const uint8_t* src_a,
const uint8_t* src_b,
int count) {
uint32_t diff = 0u; uint32_t diff = 0u;
int i; int i;
@ -46,12 +48,14 @@ uint32_t HammingDistance_C1(const uint8_t* src_a, const uint8_t* src_b, int coun
#endif #endif
// Hakmem method for hamming distance. // Hakmem method for hamming distance.
uint32_t HammingDistance_C(const uint8_t* src_a, const uint8_t* src_b, int count) { uint32_t HammingDistance_C(const uint8_t* src_a,
const uint8_t* src_b,
int count) {
uint32_t diff = 0u; uint32_t diff = 0u;
int i; int i;
for (i = 0; i < count - 3; i += 4) { for (i = 0; i < count - 3; i += 4) {
uint32_t x = *((uint32_t*)src_a) ^ *((uint32_t*)src_b); uint32_t x = *((uint32_t*)src_a) ^ *((uint32_t*)src_b); // NOLINT
uint32_t u = x - ((x >> 1) & 0x55555555); uint32_t u = x - ((x >> 1) & 0x55555555);
u = ((u >> 2) & 0x33333333) + (u & 0x33333333); u = ((u >> 2) & 0x33333333) + (u & 0x33333333);
diff += ((((u + (u >> 4)) & 0x0f0f0f0f) * 0x01010101) >> 24); diff += ((((u + (u >> 4)) & 0x0f0f0f0f) * 0x01010101) >> 24);
@ -71,7 +75,9 @@ uint32_t HammingDistance_C(const uint8_t* src_a, const uint8_t* src_b, int count
return diff; return diff;
} }
uint32_t SumSquareError_C(const uint8_t* src_a, const uint8_t* src_b, int count) { uint32_t SumSquareError_C(const uint8_t* src_a,
const uint8_t* src_b,
int count) {
uint32_t sse = 0u; uint32_t sse = 0u;
int i; int i;
for (i = 0; i < count; ++i) { for (i = 0; i < count; ++i) {

View File

@ -24,8 +24,8 @@ extern "C" {
#if defined(__x86_64__) #if defined(__x86_64__)
uint32_t HammingDistance_SSE42(const uint8_t* src_a, uint32_t HammingDistance_SSE42(const uint8_t* src_a,
const uint8_t* src_b, const uint8_t* src_b,
int count) { int count) {
uint64_t diff = 0u; uint64_t diff = 0u;
asm volatile( asm volatile(
@ -72,8 +72,8 @@ uint32_t HammingDistance_SSE42(const uint8_t* src_a,
} }
#else #else
uint32_t HammingDistance_SSE42(const uint8_t* src_a, uint32_t HammingDistance_SSE42(const uint8_t* src_a,
const uint8_t* src_b, const uint8_t* src_b,
int count) { int count) {
uint32_t diff = 0u; uint32_t diff = 0u;
asm volatile( asm volatile(
@ -116,8 +116,8 @@ static const vec8 kNibbleMask = {15, 15, 15, 15, 15, 15, 15, 15,
static const vec8 kBitCount = {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4}; static const vec8 kBitCount = {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4};
uint32_t HammingDistance_SSSE3(const uint8_t* src_a, uint32_t HammingDistance_SSSE3(const uint8_t* src_a,
const uint8_t* src_b, const uint8_t* src_b,
int count) { int count) {
uint32_t diff = 0u; uint32_t diff = 0u;
asm volatile( asm volatile(
@ -174,7 +174,9 @@ uint32_t HammingDistance_SSSE3(const uint8_t* src_a,
} }
#ifdef HAS_HAMMINGDISTANCE_AVX2 #ifdef HAS_HAMMINGDISTANCE_AVX2
uint32_t HammingDistance_AVX2(const uint8_t* src_a, const uint8_t* src_b, int count) { uint32_t HammingDistance_AVX2(const uint8_t* src_a,
const uint8_t* src_b,
int count) {
uint32_t diff = 0u; uint32_t diff = 0u;
asm volatile( asm volatile(
@ -227,43 +229,46 @@ uint32_t HammingDistance_AVX2(const uint8_t* src_a, const uint8_t* src_b, int co
} }
#endif // HAS_HAMMINGDISTANCE_AVX2 #endif // HAS_HAMMINGDISTANCE_AVX2
uint32_t SumSquareError_SSE2(const uint8_t* src_a, const uint8_t* src_b, int count) { uint32_t SumSquareError_SSE2(const uint8_t* src_a,
const uint8_t* src_b,
int count) {
uint32_t sse; uint32_t sse;
asm volatile ( asm volatile(
"pxor %%xmm0,%%xmm0 \n" "pxor %%xmm0,%%xmm0 \n"
"pxor %%xmm5,%%xmm5 \n" "pxor %%xmm5,%%xmm5 \n"
LABELALIGN
"1: \n"
"movdqu (%0),%%xmm1 \n"
"lea 0x10(%0),%0 \n"
"movdqu (%1),%%xmm2 \n"
"lea 0x10(%1),%1 \n"
"movdqa %%xmm1,%%xmm3 \n"
"psubusb %%xmm2,%%xmm1 \n"
"psubusb %%xmm3,%%xmm2 \n"
"por %%xmm2,%%xmm1 \n"
"movdqa %%xmm1,%%xmm2 \n"
"punpcklbw %%xmm5,%%xmm1 \n"
"punpckhbw %%xmm5,%%xmm2 \n"
"pmaddwd %%xmm1,%%xmm1 \n"
"pmaddwd %%xmm2,%%xmm2 \n"
"paddd %%xmm1,%%xmm0 \n"
"paddd %%xmm2,%%xmm0 \n"
"sub $0x10,%2 \n"
"jg 1b \n"
"pshufd $0xee,%%xmm0,%%xmm1 \n" LABELALIGN
"paddd %%xmm1,%%xmm0 \n" "1: \n"
"pshufd $0x1,%%xmm0,%%xmm1 \n" "movdqu (%0),%%xmm1 \n"
"paddd %%xmm1,%%xmm0 \n" "lea 0x10(%0),%0 \n"
"movd %%xmm0,%3 \n" "movdqu (%1),%%xmm2 \n"
"lea 0x10(%1),%1 \n"
"movdqa %%xmm1,%%xmm3 \n"
"psubusb %%xmm2,%%xmm1 \n"
"psubusb %%xmm3,%%xmm2 \n"
"por %%xmm2,%%xmm1 \n"
"movdqa %%xmm1,%%xmm2 \n"
"punpcklbw %%xmm5,%%xmm1 \n"
"punpckhbw %%xmm5,%%xmm2 \n"
"pmaddwd %%xmm1,%%xmm1 \n"
"pmaddwd %%xmm2,%%xmm2 \n"
"paddd %%xmm1,%%xmm0 \n"
"paddd %%xmm2,%%xmm0 \n"
"sub $0x10,%2 \n"
"jg 1b \n"
: "+r"(src_a), // %0 "pshufd $0xee,%%xmm0,%%xmm1 \n"
"+r"(src_b), // %1 "paddd %%xmm1,%%xmm0 \n"
"+r"(count), // %2 "pshufd $0x1,%%xmm0,%%xmm1 \n"
"=g"(sse) // %3 "paddd %%xmm1,%%xmm0 \n"
:: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" "movd %%xmm0,%3 \n"
);
: "+r"(src_a), // %0
"+r"(src_b), // %1
"+r"(count), // %2
"=g"(sse) // %3
::"memory",
"cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5");
return sse; return sse;
} }
@ -295,56 +300,56 @@ static const uvec32 kHashMul3 = {
uint32_t HashDjb2_SSE41(const uint8_t* src, int count, uint32_t seed) { uint32_t HashDjb2_SSE41(const uint8_t* src, int count, uint32_t seed) {
uint32_t hash; uint32_t hash;
asm volatile ( asm volatile(
"movd %2,%%xmm0 \n" "movd %2,%%xmm0 \n"
"pxor %%xmm7,%%xmm7 \n" "pxor %%xmm7,%%xmm7 \n"
"movdqa %4,%%xmm6 \n" "movdqa %4,%%xmm6 \n"
LABELALIGN
"1: \n" LABELALIGN
"movdqu (%0),%%xmm1 \n" "1: \n"
"lea 0x10(%0),%0 \n" "movdqu (%0),%%xmm1 \n"
"pmulld %%xmm6,%%xmm0 \n" "lea 0x10(%0),%0 \n"
"movdqa %5,%%xmm5 \n" "pmulld %%xmm6,%%xmm0 \n"
"movdqa %%xmm1,%%xmm2 \n" "movdqa %5,%%xmm5 \n"
"punpcklbw %%xmm7,%%xmm2 \n" "movdqa %%xmm1,%%xmm2 \n"
"movdqa %%xmm2,%%xmm3 \n" "punpcklbw %%xmm7,%%xmm2 \n"
"punpcklwd %%xmm7,%%xmm3 \n" "movdqa %%xmm2,%%xmm3 \n"
"pmulld %%xmm5,%%xmm3 \n" "punpcklwd %%xmm7,%%xmm3 \n"
"movdqa %6,%%xmm5 \n" "pmulld %%xmm5,%%xmm3 \n"
"movdqa %%xmm2,%%xmm4 \n" "movdqa %6,%%xmm5 \n"
"punpckhwd %%xmm7,%%xmm4 \n" "movdqa %%xmm2,%%xmm4 \n"
"pmulld %%xmm5,%%xmm4 \n" "punpckhwd %%xmm7,%%xmm4 \n"
"movdqa %7,%%xmm5 \n" "pmulld %%xmm5,%%xmm4 \n"
"punpckhbw %%xmm7,%%xmm1 \n" "movdqa %7,%%xmm5 \n"
"movdqa %%xmm1,%%xmm2 \n" "punpckhbw %%xmm7,%%xmm1 \n"
"punpcklwd %%xmm7,%%xmm2 \n" "movdqa %%xmm1,%%xmm2 \n"
"pmulld %%xmm5,%%xmm2 \n" "punpcklwd %%xmm7,%%xmm2 \n"
"movdqa %8,%%xmm5 \n" "pmulld %%xmm5,%%xmm2 \n"
"punpckhwd %%xmm7,%%xmm1 \n" "movdqa %8,%%xmm5 \n"
"pmulld %%xmm5,%%xmm1 \n" "punpckhwd %%xmm7,%%xmm1 \n"
"paddd %%xmm4,%%xmm3 \n" "pmulld %%xmm5,%%xmm1 \n"
"paddd %%xmm2,%%xmm1 \n" "paddd %%xmm4,%%xmm3 \n"
"paddd %%xmm3,%%xmm1 \n" "paddd %%xmm2,%%xmm1 \n"
"pshufd $0xe,%%xmm1,%%xmm2 \n" "paddd %%xmm3,%%xmm1 \n"
"paddd %%xmm2,%%xmm1 \n" "pshufd $0xe,%%xmm1,%%xmm2 \n"
"pshufd $0x1,%%xmm1,%%xmm2 \n" "paddd %%xmm2,%%xmm1 \n"
"paddd %%xmm2,%%xmm1 \n" "pshufd $0x1,%%xmm1,%%xmm2 \n"
"paddd %%xmm1,%%xmm0 \n" "paddd %%xmm2,%%xmm1 \n"
"sub $0x10,%1 \n" "paddd %%xmm1,%%xmm0 \n"
"jg 1b \n" "sub $0x10,%1 \n"
"movd %%xmm0,%3 \n" "jg 1b \n"
: "+r"(src), // %0 "movd %%xmm0,%3 \n"
"+r"(count), // %1 : "+r"(src), // %0
"+rm"(seed), // %2 "+r"(count), // %1
"=g"(hash) // %3 "+rm"(seed), // %2
: "m"(kHash16x33), // %4 "=g"(hash) // %3
"m"(kHashMul0), // %5 : "m"(kHash16x33), // %4
"m"(kHashMul1), // %6 "m"(kHashMul0), // %5
"m"(kHashMul2), // %7 "m"(kHashMul1), // %6
"m"(kHashMul3) // %8 "m"(kHashMul2), // %7
: "memory", "cc" "m"(kHashMul3) // %8
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
); "xmm7");
return hash; return hash;
} }
#endif // defined(__x86_64__) || (defined(__i386__) && !defined(__pic__))) #endif // defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))

View File

@ -22,7 +22,9 @@ namespace libyuv {
extern "C" { extern "C" {
#endif #endif
uint32_t HammingDistance_MSA(const uint8_t* src_a, const uint8_t* src_b, int count) { uint32_t HammingDistance_MSA(const uint8_t* src_a,
const uint8_t* src_b,
int count) {
uint32_t diff = 0u; uint32_t diff = 0u;
int i; int i;
v16u8 src0, src1, src2, src3; v16u8 src0, src1, src2, src3;
@ -47,7 +49,9 @@ uint32_t HammingDistance_MSA(const uint8_t* src_a, const uint8_t* src_b, int cou
return diff; return diff;
} }
uint32_t SumSquareError_MSA(const uint8_t* src_a, const uint8_t* src_b, int count) { uint32_t SumSquareError_MSA(const uint8_t* src_a,
const uint8_t* src_b,
int count) {
uint32_t sse = 0u; uint32_t sse = 0u;
int i; int i;
v16u8 src0, src1, src2, src3; v16u8 src0, src1, src2, src3;

View File

@ -23,7 +23,9 @@ extern "C" {
// 256 bits at a time // 256 bits at a time
// uses short accumulator which restricts count to 131 KB // uses short accumulator which restricts count to 131 KB
uint32_t HammingDistance_NEON(const uint8_t* src_a, const uint8_t* src_b, int count) { uint32_t HammingDistance_NEON(const uint8_t* src_a,
const uint8_t* src_b,
int count) {
uint32_t diff; uint32_t diff;
asm volatile( asm volatile(
@ -52,7 +54,9 @@ uint32_t HammingDistance_NEON(const uint8_t* src_a, const uint8_t* src_b, int co
return diff; return diff;
} }
uint32_t SumSquareError_NEON(const uint8_t* src_a, const uint8_t* src_b, int count) { uint32_t SumSquareError_NEON(const uint8_t* src_a,
const uint8_t* src_b,
int count) {
uint32_t sse; uint32_t sse;
asm volatile( asm volatile(
"vmov.u8 q8, #0 \n" "vmov.u8 q8, #0 \n"

View File

@ -22,7 +22,9 @@ extern "C" {
// 256 bits at a time // 256 bits at a time
// uses short accumulator which restricts count to 131 KB // uses short accumulator which restricts count to 131 KB
uint32_t HammingDistance_NEON(const uint8_t* src_a, const uint8_t* src_b, int count) { uint32_t HammingDistance_NEON(const uint8_t* src_a,
const uint8_t* src_b,
int count) {
uint32_t diff; uint32_t diff;
asm volatile( asm volatile(
"movi v4.8h, #0 \n" "movi v4.8h, #0 \n"
@ -47,7 +49,9 @@ uint32_t HammingDistance_NEON(const uint8_t* src_a, const uint8_t* src_b, int co
return diff; return diff;
} }
uint32_t SumSquareError_NEON(const uint8_t* src_a, const uint8_t* src_b, int count) { uint32_t SumSquareError_NEON(const uint8_t* src_a,
const uint8_t* src_b,
int count) {
uint32_t sse; uint32_t sse;
asm volatile( asm volatile(
"eor v16.16b, v16.16b, v16.16b \n" "eor v16.16b, v16.16b, v16.16b \n"

View File

@ -26,13 +26,13 @@ extern "C" {
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) #if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
uint32_t HammingDistance_SSE42(const uint8_t* src_a, uint32_t HammingDistance_SSE42(const uint8_t* src_a,
const uint8_t* src_b, const uint8_t* src_b,
int count) { int count) {
uint32_t diff = 0u; uint32_t diff = 0u;
int i; int i;
for (i = 0; i < count - 3; i += 4) { for (i = 0; i < count - 3; i += 4) {
uint32_t x = *((uint32_t*)src_a) ^ *((uint32_t*)src_b); uint32_t x = *((uint32_t*)src_a) ^ *((uint32_t*)src_b); // NOLINT
src_a += 4; src_a += 4;
src_b += 4; src_b += 4;
diff += __popcnt(x); diff += __popcnt(x);

View File

@ -451,8 +451,9 @@ int YUY2ToI420(const uint8_t* src_yuy2,
int width, int width,
int height) { int height) {
int y; int y;
void (*YUY2ToUVRow)(const uint8_t* src_yuy2, int src_stride_yuy2, uint8_t* dst_u, void (*YUY2ToUVRow)(const uint8_t* src_yuy2, int src_stride_yuy2,
uint8_t* dst_v, int width) = YUY2ToUVRow_C; uint8_t* dst_u, uint8_t* dst_v, int width) =
YUY2ToUVRow_C;
void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) = void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) =
YUY2ToYRow_C; YUY2ToYRow_C;
// Negative height means invert the image. // Negative height means invert the image.
@ -531,8 +532,9 @@ int UYVYToI420(const uint8_t* src_uyvy,
int width, int width,
int height) { int height) {
int y; int y;
void (*UYVYToUVRow)(const uint8_t* src_uyvy, int src_stride_uyvy, uint8_t* dst_u, void (*UYVYToUVRow)(const uint8_t* src_uyvy, int src_stride_uyvy,
uint8_t* dst_v, int width) = UYVYToUVRow_C; uint8_t* dst_u, uint8_t* dst_v, int width) =
UYVYToUVRow_C;
void (*UYVYToYRow)(const uint8_t* src_uyvy, uint8_t* dst_y, int width) = void (*UYVYToYRow)(const uint8_t* src_uyvy, uint8_t* dst_y, int width) =
UYVYToYRow_C; UYVYToYRow_C;
// Negative height means invert the image. // Negative height means invert the image.
@ -611,8 +613,9 @@ int ARGBToI420(const uint8_t* src_argb,
int width, int width,
int height) { int height) {
int y; int y;
void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u, void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
uint8_t* dst_v, int width) = ARGBToUVRow_C; uint8_t* dst_u, uint8_t* dst_v, int width) =
ARGBToUVRow_C;
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYRow_C; ARGBToYRow_C;
if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
@ -706,8 +709,9 @@ int BGRAToI420(const uint8_t* src_bgra,
int width, int width,
int height) { int height) {
int y; int y;
void (*BGRAToUVRow)(const uint8_t* src_bgra0, int src_stride_bgra, uint8_t* dst_u, void (*BGRAToUVRow)(const uint8_t* src_bgra0, int src_stride_bgra,
uint8_t* dst_v, int width) = BGRAToUVRow_C; uint8_t* dst_u, uint8_t* dst_v, int width) =
BGRAToUVRow_C;
void (*BGRAToYRow)(const uint8_t* src_bgra, uint8_t* dst_y, int width) = void (*BGRAToYRow)(const uint8_t* src_bgra, uint8_t* dst_y, int width) =
BGRAToYRow_C; BGRAToYRow_C;
if (!src_bgra || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { if (!src_bgra || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
@ -791,8 +795,9 @@ int ABGRToI420(const uint8_t* src_abgr,
int width, int width,
int height) { int height) {
int y; int y;
void (*ABGRToUVRow)(const uint8_t* src_abgr0, int src_stride_abgr, uint8_t* dst_u, void (*ABGRToUVRow)(const uint8_t* src_abgr0, int src_stride_abgr,
uint8_t* dst_v, int width) = ABGRToUVRow_C; uint8_t* dst_u, uint8_t* dst_v, int width) =
ABGRToUVRow_C;
void (*ABGRToYRow)(const uint8_t* src_abgr, uint8_t* dst_y, int width) = void (*ABGRToYRow)(const uint8_t* src_abgr, uint8_t* dst_y, int width) =
ABGRToYRow_C; ABGRToYRow_C;
if (!src_abgr || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { if (!src_abgr || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
@ -876,8 +881,9 @@ int RGBAToI420(const uint8_t* src_rgba,
int width, int width,
int height) { int height) {
int y; int y;
void (*RGBAToUVRow)(const uint8_t* src_rgba0, int src_stride_rgba, uint8_t* dst_u, void (*RGBAToUVRow)(const uint8_t* src_rgba0, int src_stride_rgba,
uint8_t* dst_v, int width) = RGBAToUVRow_C; uint8_t* dst_u, uint8_t* dst_v, int width) =
RGBAToUVRow_C;
void (*RGBAToYRow)(const uint8_t* src_rgba, uint8_t* dst_y, int width) = void (*RGBAToYRow)(const uint8_t* src_rgba, uint8_t* dst_y, int width) =
RGBAToYRow_C; RGBAToYRow_C;
if (!src_rgba || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { if (!src_rgba || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
@ -963,14 +969,16 @@ int RGB24ToI420(const uint8_t* src_rgb24,
int y; int y;
#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA)) #if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA))
void (*RGB24ToUVRow)(const uint8_t* src_rgb24, int src_stride_rgb24, void (*RGB24ToUVRow)(const uint8_t* src_rgb24, int src_stride_rgb24,
uint8_t* dst_u, uint8_t* dst_v, int width) = RGB24ToUVRow_C; uint8_t* dst_u, uint8_t* dst_v, int width) =
RGB24ToUVRow_C;
void (*RGB24ToYRow)(const uint8_t* src_rgb24, uint8_t* dst_y, int width) = void (*RGB24ToYRow)(const uint8_t* src_rgb24, uint8_t* dst_y, int width) =
RGB24ToYRow_C; RGB24ToYRow_C;
#else #else
void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
RGB24ToARGBRow_C; RGB24ToARGBRow_C;
void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u, void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
uint8_t* dst_v, int width) = ARGBToUVRow_C; uint8_t* dst_u, uint8_t* dst_v, int width) =
ARGBToUVRow_C;
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYRow_C; ARGBToYRow_C;
#endif #endif
@ -1099,8 +1107,9 @@ int RAWToI420(const uint8_t* src_raw,
#else #else
void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
RAWToARGBRow_C; RAWToARGBRow_C;
void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u, void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
uint8_t* dst_v, int width) = ARGBToUVRow_C; uint8_t* dst_u, uint8_t* dst_v, int width) =
ARGBToUVRow_C;
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYRow_C; ARGBToYRow_C;
#endif #endif
@ -1228,10 +1237,11 @@ int RGB565ToI420(const uint8_t* src_rgb565,
void (*RGB565ToYRow)(const uint8_t* src_rgb565, uint8_t* dst_y, int width) = void (*RGB565ToYRow)(const uint8_t* src_rgb565, uint8_t* dst_y, int width) =
RGB565ToYRow_C; RGB565ToYRow_C;
#else #else
void (*RGB565ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = void (*RGB565ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb,
RGB565ToARGBRow_C; int width) = RGB565ToARGBRow_C;
void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u, void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
uint8_t* dst_v, int width) = ARGBToUVRow_C; uint8_t* dst_u, uint8_t* dst_v, int width) =
ARGBToUVRow_C;
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYRow_C; ARGBToYRow_C;
#endif #endif
@ -1362,13 +1372,14 @@ int ARGB1555ToI420(const uint8_t* src_argb1555,
void (*ARGB1555ToUVRow)(const uint8_t* src_argb1555, int src_stride_argb1555, void (*ARGB1555ToUVRow)(const uint8_t* src_argb1555, int src_stride_argb1555,
uint8_t* dst_u, uint8_t* dst_v, int width) = uint8_t* dst_u, uint8_t* dst_v, int width) =
ARGB1555ToUVRow_C; ARGB1555ToUVRow_C;
void (*ARGB1555ToYRow)(const uint8_t* src_argb1555, uint8_t* dst_y, int width) = void (*ARGB1555ToYRow)(const uint8_t* src_argb1555, uint8_t* dst_y,
ARGB1555ToYRow_C; int width) = ARGB1555ToYRow_C;
#else #else
void (*ARGB1555ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = void (*ARGB1555ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb,
ARGB1555ToARGBRow_C; int width) = ARGB1555ToARGBRow_C;
void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u, void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
uint8_t* dst_v, int width) = ARGBToUVRow_C; uint8_t* dst_u, uint8_t* dst_v, int width) =
ARGBToUVRow_C;
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYRow_C; ARGBToYRow_C;
#endif #endif
@ -1503,13 +1514,14 @@ int ARGB4444ToI420(const uint8_t* src_argb4444,
void (*ARGB4444ToUVRow)(const uint8_t* src_argb4444, int src_stride_argb4444, void (*ARGB4444ToUVRow)(const uint8_t* src_argb4444, int src_stride_argb4444,
uint8_t* dst_u, uint8_t* dst_v, int width) = uint8_t* dst_u, uint8_t* dst_v, int width) =
ARGB4444ToUVRow_C; ARGB4444ToUVRow_C;
void (*ARGB4444ToYRow)(const uint8_t* src_argb4444, uint8_t* dst_y, int width) = void (*ARGB4444ToYRow)(const uint8_t* src_argb4444, uint8_t* dst_y,
ARGB4444ToYRow_C; int width) = ARGB4444ToYRow_C;
#else #else
void (*ARGB4444ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = void (*ARGB4444ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb,
ARGB4444ToARGBRow_C; int width) = ARGB4444ToARGBRow_C;
void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u, void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
uint8_t* dst_v, int width) = ARGBToUVRow_C; uint8_t* dst_u, uint8_t* dst_v, int width) =
ARGBToUVRow_C;
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYRow_C; ARGBToYRow_C;
#endif #endif

View File

@ -776,8 +776,8 @@ static int I420AlphaToARGBMatrix(const uint8_t* src_y,
uint8_t* dst_argb, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width) = I422AlphaToARGBRow_C; int width) = I422AlphaToARGBRow_C;
void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
ARGBAttenuateRow_C; int width) = ARGBAttenuateRow_C;
if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) {
return -1; return -1;
} }
@ -1244,8 +1244,8 @@ int RGB565ToARGB(const uint8_t* src_rgb565,
int width, int width,
int height) { int height) {
int y; int y;
void (*RGB565ToARGBRow)(const uint8_t* src_rgb565, uint8_t* dst_argb, int width) = void (*RGB565ToARGBRow)(const uint8_t* src_rgb565, uint8_t* dst_argb,
RGB565ToARGBRow_C; int width) = RGB565ToARGBRow_C;
if (!src_rgb565 || !dst_argb || width <= 0 || height == 0) { if (!src_rgb565 || !dst_argb || width <= 0 || height == 0) {
return -1; return -1;
} }
@ -1481,9 +1481,9 @@ static int NV12ToARGBMatrix(const uint8_t* src_y,
int width, int width,
int height) { int height) {
int y; int y;
void (*NV12ToARGBRow)(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf, void (*NV12ToARGBRow)(
const struct YuvConstants* yuvconstants, int width) = const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf,
NV12ToARGBRow_C; const struct YuvConstants* yuvconstants, int width) = NV12ToARGBRow_C;
if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) { if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) {
return -1; return -1;
} }
@ -1548,9 +1548,9 @@ static int NV21ToARGBMatrix(const uint8_t* src_y,
int width, int width,
int height) { int height) {
int y; int y;
void (*NV21ToARGBRow)(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf, void (*NV21ToARGBRow)(
const struct YuvConstants* yuvconstants, int width) = const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf,
NV21ToARGBRow_C; const struct YuvConstants* yuvconstants, int width) = NV21ToARGBRow_C;
if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) { if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) {
return -1; return -1;
} }
@ -1670,9 +1670,9 @@ int M420ToARGB(const uint8_t* src_m420,
int width, int width,
int height) { int height) {
int y; int y;
void (*NV12ToARGBRow)(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf, void (*NV12ToARGBRow)(
const struct YuvConstants* yuvconstants, int width) = const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf,
NV12ToARGBRow_C; const struct YuvConstants* yuvconstants, int width) = NV12ToARGBRow_C;
if (!src_m420 || !dst_argb || width <= 0 || height == 0) { if (!src_m420 || !dst_argb || width <= 0 || height == 0) {
return -1; return -1;
} }

View File

@ -1123,7 +1123,7 @@ int I420ToRGB565Dither(const uint8_t* src_y,
I422ToARGBRow(src_y, src_u, src_v, row_argb, &kYuvI601Constants, width); I422ToARGBRow(src_y, src_u, src_v, row_argb, &kYuvI601Constants, width);
ARGBToRGB565DitherRow(row_argb, dst_rgb565, ARGBToRGB565DitherRow(row_argb, dst_rgb565,
*(uint32_t*)(dither4x4 + ((y & 3) << 2)), // NOLINT *(uint32_t*)(dither4x4 + ((y & 3) << 2)), // NOLINT
width); // NOLINT width); // NOLINT
dst_rgb565 += dst_stride_rgb565; dst_rgb565 += dst_stride_rgb565;
src_y += src_stride_y; src_y += src_stride_y;
if (y & 1) { if (y & 1) {

View File

@ -35,8 +35,8 @@ int ARGBToI444(const uint8_t* src_argb,
int y; int y;
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYRow_C; ARGBToYRow_C;
void (*ARGBToUV444Row)(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, void (*ARGBToUV444Row)(const uint8_t* src_argb, uint8_t* dst_u,
int width) = ARGBToUV444Row_C; uint8_t* dst_v, int width) = ARGBToUV444Row_C;
if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
return -1; return -1;
} }
@ -133,8 +133,9 @@ int ARGBToI422(const uint8_t* src_argb,
int width, int width,
int height) { int height) {
int y; int y;
void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u, void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
uint8_t* dst_v, int width) = ARGBToUVRow_C; uint8_t* dst_u, uint8_t* dst_v, int width) =
ARGBToUVRow_C;
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYRow_C; ARGBToYRow_C;
if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
@ -229,12 +230,13 @@ int ARGBToNV12(const uint8_t* src_argb,
int height) { int height) {
int y; int y;
int halfwidth = (width + 1) >> 1; int halfwidth = (width + 1) >> 1;
void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u, void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
uint8_t* dst_v, int width) = ARGBToUVRow_C; uint8_t* dst_u, uint8_t* dst_v, int width) =
ARGBToUVRow_C;
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYRow_C; ARGBToYRow_C;
void (*MergeUVRow_)(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, void (*MergeUVRow_)(const uint8_t* src_u, const uint8_t* src_v,
int width) = MergeUVRow_C; uint8_t* dst_uv, int width) = MergeUVRow_C;
if (!src_argb || !dst_y || !dst_uv || width <= 0 || height == 0) { if (!src_argb || !dst_y || !dst_uv || width <= 0 || height == 0) {
return -1; return -1;
} }
@ -364,12 +366,13 @@ int ARGBToNV21(const uint8_t* src_argb,
int height) { int height) {
int y; int y;
int halfwidth = (width + 1) >> 1; int halfwidth = (width + 1) >> 1;
void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u, void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb,
uint8_t* dst_v, int width) = ARGBToUVRow_C; uint8_t* dst_u, uint8_t* dst_v, int width) =
ARGBToUVRow_C;
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYRow_C; ARGBToYRow_C;
void (*MergeUVRow_)(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, void (*MergeUVRow_)(const uint8_t* src_u, const uint8_t* src_v,
int width) = MergeUVRow_C; uint8_t* dst_uv, int width) = MergeUVRow_C;
if (!src_argb || !dst_y || !dst_uv || width <= 0 || height == 0) { if (!src_argb || !dst_y || !dst_uv || width <= 0 || height == 0) {
return -1; return -1;
} }
@ -496,8 +499,9 @@ int ARGBToYUY2(const uint8_t* src_argb,
int width, int width,
int height) { int height) {
int y; int y;
void (*ARGBToUVRow)(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u, void (*ARGBToUVRow)(const uint8_t* src_argb, int src_stride_argb,
uint8_t* dst_v, int width) = ARGBToUVRow_C; uint8_t* dst_u, uint8_t* dst_v, int width) =
ARGBToUVRow_C;
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYRow_C; ARGBToYRow_C;
void (*I422ToYUY2Row)(const uint8_t* src_y, const uint8_t* src_u, void (*I422ToYUY2Row)(const uint8_t* src_y, const uint8_t* src_u,
@ -624,8 +628,9 @@ int ARGBToUYVY(const uint8_t* src_argb,
int width, int width,
int height) { int height) {
int y; int y;
void (*ARGBToUVRow)(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u, void (*ARGBToUVRow)(const uint8_t* src_argb, int src_stride_argb,
uint8_t* dst_v, int width) = ARGBToUVRow_C; uint8_t* dst_u, uint8_t* dst_v, int width) =
ARGBToUVRow_C;
void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYRow_C; ARGBToYRow_C;
void (*I422ToUYVYRow)(const uint8_t* src_y, const uint8_t* src_u, void (*I422ToUYVYRow)(const uint8_t* src_y, const uint8_t* src_u,
@ -1005,7 +1010,7 @@ int ARGBToRGB565Dither(const uint8_t* src_argb,
for (y = 0; y < height; ++y) { for (y = 0; y < height; ++y) {
ARGBToRGB565DitherRow(src_argb, dst_rgb565, ARGBToRGB565DitherRow(src_argb, dst_rgb565,
*(uint32_t*)(dither4x4 + ((y & 3) << 2)), *(uint32_t*)(dither4x4 + ((y & 3) << 2)), // NOLINT
width); /* NOLINT */ width); /* NOLINT */
src_argb += src_stride_argb; src_argb += src_stride_argb;
dst_rgb565 += dst_stride_rgb565; dst_rgb565 += dst_stride_rgb565;
@ -1023,8 +1028,8 @@ int ARGBToRGB565(const uint8_t* src_argb,
int width, int width,
int height) { int height) {
int y; int y;
void (*ARGBToRGB565Row)(const uint8_t* src_argb, uint8_t* dst_rgb, int width) = void (*ARGBToRGB565Row)(const uint8_t* src_argb, uint8_t* dst_rgb,
ARGBToRGB565Row_C; int width) = ARGBToRGB565Row_C;
if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) { if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) {
return -1; return -1;
} }
@ -1089,8 +1094,8 @@ int ARGBToARGB1555(const uint8_t* src_argb,
int width, int width,
int height) { int height) {
int y; int y;
void (*ARGBToARGB1555Row)(const uint8_t* src_argb, uint8_t* dst_rgb, int width) = void (*ARGBToARGB1555Row)(const uint8_t* src_argb, uint8_t* dst_rgb,
ARGBToARGB1555Row_C; int width) = ARGBToARGB1555Row_C;
if (!src_argb || !dst_argb1555 || width <= 0 || height == 0) { if (!src_argb || !dst_argb1555 || width <= 0 || height == 0) {
return -1; return -1;
} }
@ -1155,8 +1160,8 @@ int ARGBToARGB4444(const uint8_t* src_argb,
int width, int width,
int height) { int height) {
int y; int y;
void (*ARGBToARGB4444Row)(const uint8_t* src_argb, uint8_t* dst_rgb, int width) = void (*ARGBToARGB4444Row)(const uint8_t* src_argb, uint8_t* dst_rgb,
ARGBToARGB4444Row_C; int width) = ARGBToARGB4444Row_C;
if (!src_argb || !dst_argb4444 || width <= 0 || height == 0) { if (!src_argb || !dst_argb4444 || width <= 0 || height == 0) {
return -1; return -1;
} }
@ -1275,7 +1280,8 @@ int ARGBToJ420(const uint8_t* src_argb,
int height) { int height) {
int y; int y;
void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb, void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb,
uint8_t* dst_u, uint8_t* dst_v, int width) = ARGBToUVJRow_C; uint8_t* dst_u, uint8_t* dst_v, int width) =
ARGBToUVJRow_C;
void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) = void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) =
ARGBToYJRow_C; ARGBToYJRow_C;
if (!src_argb || !dst_yj || !dst_u || !dst_v || width <= 0 || height == 0) { if (!src_argb || !dst_yj || !dst_u || !dst_v || width <= 0 || height == 0) {
@ -1368,7 +1374,8 @@ int ARGBToJ422(const uint8_t* src_argb,
int height) { int height) {
int y; int y;
void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb, void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb,
uint8_t* dst_u, uint8_t* dst_v, int width) = ARGBToUVJRow_C; uint8_t* dst_u, uint8_t* dst_v, int width) =
ARGBToUVJRow_C;
void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) = void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) =
ARGBToYJRow_C; ARGBToYJRow_C;
if (!src_argb || !dst_yj || !dst_u || !dst_v || width <= 0 || height == 0) { if (!src_argb || !dst_yj || !dst_u || !dst_v || width <= 0 || height == 0) {

View File

@ -89,7 +89,10 @@ static void JpegI400ToI420(void* opaque,
// Query size of MJPG in pixels. // Query size of MJPG in pixels.
LIBYUV_API LIBYUV_API
int MJPGSize(const uint8_t* sample, size_t sample_size, int* width, int* height) { int MJPGSize(const uint8_t* sample,
size_t sample_size,
int* width,
int* height) {
MJpegDecoder mjpeg_decoder; MJpegDecoder mjpeg_decoder;
LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size); LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
if (ret) { if (ret) {

View File

@ -430,8 +430,8 @@ void MergeUVPlane(const uint8_t* src_u,
int width, int width,
int height) { int height) {
int y; int y;
void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v,
int width) = MergeUVRow_C; uint8_t* dst_uv, int width) = MergeUVRow_C;
// Coalesce rows. // Coalesce rows.
// Negative height means invert the image. // Negative height means invert the image.
if (height < 0) { if (height < 0) {
@ -673,8 +673,8 @@ int YUY2ToI422(const uint8_t* src_yuy2,
int width, int width,
int height) { int height) {
int y; int y;
void (*YUY2ToUV422Row)(const uint8_t* src_yuy2, uint8_t* dst_u, uint8_t* dst_v, void (*YUY2ToUV422Row)(const uint8_t* src_yuy2, uint8_t* dst_u,
int width) = YUY2ToUV422Row_C; uint8_t* dst_v, int width) = YUY2ToUV422Row_C;
void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) = void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) =
YUY2ToYRow_C; YUY2ToYRow_C;
if (!src_yuy2 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { if (!src_yuy2 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
@ -759,8 +759,8 @@ int UYVYToI422(const uint8_t* src_uyvy,
int width, int width,
int height) { int height) {
int y; int y;
void (*UYVYToUV422Row)(const uint8_t* src_uyvy, uint8_t* dst_u, uint8_t* dst_v, void (*UYVYToUV422Row)(const uint8_t* src_uyvy, uint8_t* dst_u,
int width) = UYVYToUV422Row_C; uint8_t* dst_v, int width) = UYVYToUV422Row_C;
void (*UYVYToYRow)(const uint8_t* src_uyvy, uint8_t* dst_y, int width) = void (*UYVYToYRow)(const uint8_t* src_uyvy, uint8_t* dst_y, int width) =
UYVYToYRow_C; UYVYToYRow_C;
if (!src_uyvy || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { if (!src_uyvy || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
@ -1287,8 +1287,8 @@ int ARGBMultiply(const uint8_t* src_argb0,
int width, int width,
int height) { int height) {
int y; int y;
void (*ARGBMultiplyRow)(const uint8_t* src0, const uint8_t* src1, uint8_t* dst, void (*ARGBMultiplyRow)(const uint8_t* src0, const uint8_t* src1,
int width) = ARGBMultiplyRow_C; uint8_t* dst, int width) = ARGBMultiplyRow_C;
if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
return -1; return -1;
} }
@ -1436,8 +1436,8 @@ int ARGBSubtract(const uint8_t* src_argb0,
int width, int width,
int height) { int height) {
int y; int y;
void (*ARGBSubtractRow)(const uint8_t* src0, const uint8_t* src1, uint8_t* dst, void (*ARGBSubtractRow)(const uint8_t* src0, const uint8_t* src1,
int width) = ARGBSubtractRow_C; uint8_t* dst, int width) = ARGBSubtractRow_C;
if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
return -1; return -1;
} }
@ -1822,7 +1822,8 @@ int ARGBRect(uint8_t* dst_argb,
int height, int height,
uint32_t value) { uint32_t value) {
int y; int y;
void (*ARGBSetRow)(uint8_t * dst_argb, uint32_t value, int width) = ARGBSetRow_C; void (*ARGBSetRow)(uint8_t * dst_argb, uint32_t value, int width) =
ARGBSetRow_C;
if (!dst_argb || width <= 0 || height == 0 || dst_x < 0 || dst_y < 0) { if (!dst_argb || width <= 0 || height == 0 || dst_x < 0 || dst_y < 0) {
return -1; return -1;
} }
@ -1890,8 +1891,8 @@ int ARGBAttenuate(const uint8_t* src_argb,
int width, int width,
int height) { int height) {
int y; int y;
void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
ARGBAttenuateRow_C; int width) = ARGBAttenuateRow_C;
if (!src_argb || !dst_argb || width <= 0 || height == 0) { if (!src_argb || !dst_argb || width <= 0 || height == 0) {
return -1; return -1;
} }
@ -2399,9 +2400,9 @@ int ARGBBlur(const uint8_t* src_argb,
void (*ComputeCumulativeSumRow)(const uint8_t* row, int32_t* cumsum, void (*ComputeCumulativeSumRow)(const uint8_t* row, int32_t* cumsum,
const int32_t* previous_cumsum, int width) = const int32_t* previous_cumsum, int width) =
ComputeCumulativeSumRow_C; ComputeCumulativeSumRow_C;
void (*CumulativeSumToAverageRow)(const int32_t* topleft, const int32_t* botleft, void (*CumulativeSumToAverageRow)(
int width, int area, uint8_t* dst, const int32_t* topleft, const int32_t* botleft, int width, int area,
int count) = CumulativeSumToAverageRow_C; uint8_t* dst, int count) = CumulativeSumToAverageRow_C;
int32_t* cumsum_bot_row; int32_t* cumsum_bot_row;
int32_t* max_cumsum_bot_row; int32_t* max_cumsum_bot_row;
int32_t* cumsum_top_row; int32_t* cumsum_top_row;
@ -2752,8 +2753,8 @@ static int ARGBSobelize(const uint8_t* src_argb,
int y; int y;
void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_g, int width) = void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_g, int width) =
ARGBToYJRow_C; ARGBToYJRow_C;
void (*SobelYRow)(const uint8_t* src_y0, const uint8_t* src_y1, uint8_t* dst_sobely, void (*SobelYRow)(const uint8_t* src_y0, const uint8_t* src_y1,
int width) = SobelYRow_C; uint8_t* dst_sobely, int width) = SobelYRow_C;
void (*SobelXRow)(const uint8_t* src_y0, const uint8_t* src_y1, void (*SobelXRow)(const uint8_t* src_y0, const uint8_t* src_y1,
const uint8_t* src_y2, uint8_t* dst_sobely, int width) = const uint8_t* src_y2, uint8_t* dst_sobely, int width) =
SobelXRow_C; SobelXRow_C;
@ -3052,8 +3053,8 @@ int HalfFloatPlane(const uint16_t* src_y,
int width, int width,
int height) { int height) {
int y; int y;
void (*HalfFloatRow)(const uint16_t* src, uint16_t* dst, float scale, int width) = void (*HalfFloatRow)(const uint16_t* src, uint16_t* dst, float scale,
HalfFloatRow_C; int width) = HalfFloatRow_C;
if (!src_y || !dst_y || width <= 0 || height == 0) { if (!src_y || !dst_y || width <= 0 || height == 0) {
return -1; return -1;
} }
@ -3133,8 +3134,8 @@ int ARGBLumaColorTable(const uint8_t* src_argb,
int height) { int height) {
int y; int y;
void (*ARGBLumaColorTableRow)( void (*ARGBLumaColorTableRow)(
const uint8_t* src_argb, uint8_t* dst_argb, int width, const uint8_t* luma, const uint8_t* src_argb, uint8_t* dst_argb, int width,
const uint32_t lumacoeff) = ARGBLumaColorTableRow_C; const uint8_t* luma, const uint32_t lumacoeff) = ARGBLumaColorTableRow_C;
if (!src_argb || !dst_argb || !luma || width <= 0 || height == 0) { if (!src_argb || !dst_argb || !luma || width <= 0 || height == 0) {
return -1; return -1;
} }
@ -3173,8 +3174,8 @@ int ARGBCopyAlpha(const uint8_t* src_argb,
int width, int width,
int height) { int height) {
int y; int y;
void (*ARGBCopyAlphaRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = void (*ARGBCopyAlphaRow)(const uint8_t* src_argb, uint8_t* dst_argb,
ARGBCopyAlphaRow_C; int width) = ARGBCopyAlphaRow_C;
if (!src_argb || !dst_argb || width <= 0 || height == 0) { if (!src_argb || !dst_argb || width <= 0 || height == 0) {
return -1; return -1;
} }
@ -3238,8 +3239,8 @@ int ARGBExtractAlpha(const uint8_t* src_argb,
height = 1; height = 1;
src_stride = dst_stride = 0; src_stride = dst_stride = 0;
} }
void (*ARGBExtractAlphaRow)(const uint8_t* src_argb, uint8_t* dst_a, int width) = void (*ARGBExtractAlphaRow)(const uint8_t* src_argb, uint8_t* dst_a,
ARGBExtractAlphaRow_C; int width) = ARGBExtractAlphaRow_C;
#if defined(HAS_ARGBEXTRACTALPHAROW_SSE2) #if defined(HAS_ARGBEXTRACTALPHAROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) { if (TestCpuFlag(kCpuHasSSE2)) {
ARGBExtractAlphaRow = IS_ALIGNED(width, 8) ? ARGBExtractAlphaRow_SSE2 ARGBExtractAlphaRow = IS_ALIGNED(width, 8) ? ARGBExtractAlphaRow_SSE2
@ -3282,8 +3283,8 @@ int ARGBCopyYToAlpha(const uint8_t* src_y,
int width, int width,
int height) { int height) {
int y; int y;
void (*ARGBCopyYToAlphaRow)(const uint8_t* src_y, uint8_t* dst_argb, int width) = void (*ARGBCopyYToAlphaRow)(const uint8_t* src_y, uint8_t* dst_argb,
ARGBCopyYToAlphaRow_C; int width) = ARGBCopyYToAlphaRow_C;
if (!src_y || !dst_argb || width <= 0 || height == 0) { if (!src_y || !dst_argb || width <= 0 || height == 0) {
return -1; return -1;
} }

View File

@ -314,8 +314,8 @@ void RotateUV180(const uint8_t* src,
int width, int width,
int height) { int height) {
int i; int i;
void (*MirrorUVRow)(const uint8_t* src, uint8_t* dst_u, uint8_t* dst_v, int width) = void (*MirrorUVRow)(const uint8_t* src, uint8_t* dst_u, uint8_t* dst_v,
MirrorUVRow_C; int width) = MirrorUVRow_C;
#if defined(HAS_MIRRORUVROW_NEON) #if defined(HAS_MIRRORUVROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
MirrorUVRow = MirrorUVRow_NEON; MirrorUVRow = MirrorUVRow_NEON;

View File

@ -19,8 +19,8 @@ extern "C" {
#endif #endif
#define TANY(NAMEANY, TPOS_SIMD, MASK) \ #define TANY(NAMEANY, TPOS_SIMD, MASK) \
void NAMEANY(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, \ void NAMEANY(const uint8_t* src, int src_stride, uint8_t* dst, \
int width) { \ int dst_stride, int width) { \
int r = width & MASK; \ int r = width & MASK; \
int n = width - r; \ int n = width - r; \
if (n > 0) { \ if (n > 0) { \
@ -44,8 +44,9 @@ TANY(TransposeWx16_Any_MSA, TransposeWx16_MSA, 15)
#undef TANY #undef TANY
#define TUVANY(NAMEANY, TPOS_SIMD, MASK) \ #define TUVANY(NAMEANY, TPOS_SIMD, MASK) \
void NAMEANY(const uint8_t* src, int src_stride, uint8_t* dst_a, \ void NAMEANY(const uint8_t* src, int src_stride, uint8_t* dst_a, \
int dst_stride_a, uint8_t* dst_b, int dst_stride_b, int width) { \ int dst_stride_a, uint8_t* dst_b, int dst_stride_b, \
int width) { \
int r = width & MASK; \ int r = width & MASK; \
int n = width - r; \ int n = width - r; \
if (n > 0) { \ if (n > 0) { \

View File

@ -31,25 +31,25 @@ extern "C" {
#define SS(width, shift) (((width) + (1 << (shift)) - 1) >> (shift)) #define SS(width, shift) (((width) + (1 << (shift)) - 1) >> (shift))
// Any 4 planes to 1 with yuvconstants // Any 4 planes to 1 with yuvconstants
#define ANY41C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \ #define ANY41C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, \ void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \
const uint8_t* a_buf, uint8_t* dst_ptr, \ const uint8_t* v_buf, const uint8_t* a_buf, uint8_t* dst_ptr, \
const struct YuvConstants* yuvconstants, int width) { \ const struct YuvConstants* yuvconstants, int width) { \
SIMD_ALIGNED(uint8_t temp[64 * 5]); \ SIMD_ALIGNED(uint8_t temp[64 * 5]); \
memset(temp, 0, 64 * 4); /* for msan */ \ memset(temp, 0, 64 * 4); /* for msan */ \
int r = width & MASK; \ int r = width & MASK; \
int n = width & ~MASK; \ int n = width & ~MASK; \
if (n > 0) { \ if (n > 0) { \
ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n); \ ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n); \
} \ } \
memcpy(temp, y_buf + n, r); \ memcpy(temp, y_buf + n, r); \
memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
memcpy(temp + 192, a_buf + n, r); \ memcpy(temp + 192, a_buf + n, r); \
ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, temp + 256, \ ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, temp + 256, \
yuvconstants, MASK + 1); \ yuvconstants, MASK + 1); \
memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 256, \ memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 256, \
SS(r, DUVSHIFT) * BPP); \ SS(r, DUVSHIFT) * BPP); \
} }
#ifdef HAS_I422ALPHATOARGBROW_SSSE3 #ifdef HAS_I422ALPHATOARGBROW_SSSE3
@ -67,22 +67,22 @@ ANY41C(I422AlphaToARGBRow_Any_MSA, I422AlphaToARGBRow_MSA, 1, 0, 4, 7)
#undef ANY41C #undef ANY41C
// Any 3 planes to 1. // Any 3 planes to 1.
#define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \ #define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, \ void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \
uint8_t* dst_ptr, int width) { \ const uint8_t* v_buf, uint8_t* dst_ptr, int width) { \
SIMD_ALIGNED(uint8_t temp[64 * 4]); \ SIMD_ALIGNED(uint8_t temp[64 * 4]); \
memset(temp, 0, 64 * 3); /* for YUY2 and msan */ \ memset(temp, 0, 64 * 3); /* for YUY2 and msan */ \
int r = width & MASK; \ int r = width & MASK; \
int n = width & ~MASK; \ int n = width & ~MASK; \
if (n > 0) { \ if (n > 0) { \
ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, n); \ ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, n); \
} \ } \
memcpy(temp, y_buf + n, r); \ memcpy(temp, y_buf + n, r); \
memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, MASK + 1); \ ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, MASK + 1); \
memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192, \ memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192, \
SS(r, DUVSHIFT) * BPP); \ SS(r, DUVSHIFT) * BPP); \
} }
// Merge functions. // Merge functions.
@ -120,10 +120,10 @@ ANY31(BlendPlaneRow_Any_SSSE3, BlendPlaneRow_SSSE3, 0, 0, 1, 7)
// on arm that subsamples 444 to 422 internally. // on arm that subsamples 444 to 422 internally.
// Any 3 planes to 1 with yuvconstants // Any 3 planes to 1 with yuvconstants
#define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \ #define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, \ void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \
uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, \ const uint8_t* v_buf, uint8_t* dst_ptr, \
int width) { \ const struct YuvConstants* yuvconstants, int width) { \
SIMD_ALIGNED(uint8_t temp[64 * 4]); \ SIMD_ALIGNED(uint8_t temp[64 * 4]); \
memset(temp, 0, 64 * 3); /* for YUY2 and msan */ \ memset(temp, 0, 64 * 3); /* for YUY2 and msan */ \
int r = width & MASK; \ int r = width & MASK; \
int n = width & ~MASK; \ int n = width & ~MASK; \
@ -199,22 +199,23 @@ ANY31C(I422ToRGB565Row_Any_MSA, I422ToRGB565Row_MSA, 1, 0, 2, 7)
// Any 3 planes of 16 bit to 1 with yuvconstants // Any 3 planes of 16 bit to 1 with yuvconstants
// TODO(fbarchard): consider sharing this code with ANY31C // TODO(fbarchard): consider sharing this code with ANY31C
#define ANY31CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK) \ #define ANY31CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK) \
void NAMEANY(const T* y_buf, const T* u_buf, const T* v_buf, uint8_t* dst_ptr, \ void NAMEANY(const T* y_buf, const T* u_buf, const T* v_buf, \
const struct YuvConstants* yuvconstants, int width) { \ uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, \
SIMD_ALIGNED(T temp[16 * 3]); \ int width) { \
SIMD_ALIGNED(uint8_t out[64]); \ SIMD_ALIGNED(T temp[16 * 3]); \
memset(temp, 0, 16 * 3 * SBPP); /* for YUY2 and msan */ \ SIMD_ALIGNED(uint8_t out[64]); \
int r = width & MASK; \ memset(temp, 0, 16 * 3 * SBPP); /* for YUY2 and msan */ \
int n = width & ~MASK; \ int r = width & MASK; \
if (n > 0) { \ int n = width & ~MASK; \
ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \ if (n > 0) { \
} \ ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \
memcpy(temp, y_buf + n, r * SBPP); \ } \
memcpy(temp + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \ memcpy(temp, y_buf + n, r * SBPP); \
memcpy(temp + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \ memcpy(temp + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \
ANY_SIMD(temp, temp + 16, temp + 32, out, yuvconstants, MASK + 1); \ memcpy(temp + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \
memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, out, SS(r, DUVSHIFT) * BPP); \ ANY_SIMD(temp, temp + 16, temp + 32, out, yuvconstants, MASK + 1); \
memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, out, SS(r, DUVSHIFT) * BPP); \
} }
#ifdef HAS_I210TOAR30ROW_SSSE3 #ifdef HAS_I210TOAR30ROW_SSSE3
@ -229,21 +230,21 @@ ANY31CT(I210ToARGBRow_Any_AVX2, I210ToARGBRow_AVX2, 1, 0, uint16_t, 2, 4, 15)
#undef ANY31CT #undef ANY31CT
// Any 2 planes to 1. // Any 2 planes to 1.
#define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \ #define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \
void NAMEANY(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, \ void NAMEANY(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, \
int width) { \ int width) { \
SIMD_ALIGNED(uint8_t temp[64 * 3]); \ SIMD_ALIGNED(uint8_t temp[64 * 3]); \
memset(temp, 0, 64 * 2); /* for msan */ \ memset(temp, 0, 64 * 2); /* for msan */ \
int r = width & MASK; \ int r = width & MASK; \
int n = width & ~MASK; \ int n = width & ~MASK; \
if (n > 0) { \ if (n > 0) { \
ANY_SIMD(y_buf, uv_buf, dst_ptr, n); \ ANY_SIMD(y_buf, uv_buf, dst_ptr, n); \
} \ } \
memcpy(temp, y_buf + n * SBPP, r * SBPP); \ memcpy(temp, y_buf + n * SBPP, r * SBPP); \
memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2, \ memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2, \
SS(r, UVSHIFT) * SBPP2); \ SS(r, UVSHIFT) * SBPP2); \
ANY_SIMD(temp, temp + 64, temp + 128, MASK + 1); \ ANY_SIMD(temp, temp + 64, temp + 128, MASK + 1); \
memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
} }
// Merge functions. // Merge functions.
@ -327,21 +328,21 @@ ANY21(SobelXYRow_Any_MSA, SobelXYRow_MSA, 0, 1, 1, 4, 15)
#undef ANY21 #undef ANY21
// Any 2 planes to 1 with yuvconstants // Any 2 planes to 1 with yuvconstants
#define ANY21C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \ #define ANY21C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \
void NAMEANY(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, \ void NAMEANY(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, \
const struct YuvConstants* yuvconstants, int width) { \ const struct YuvConstants* yuvconstants, int width) { \
SIMD_ALIGNED(uint8_t temp[64 * 3]); \ SIMD_ALIGNED(uint8_t temp[64 * 3]); \
memset(temp, 0, 64 * 2); /* for msan */ \ memset(temp, 0, 64 * 2); /* for msan */ \
int r = width & MASK; \ int r = width & MASK; \
int n = width & ~MASK; \ int n = width & ~MASK; \
if (n > 0) { \ if (n > 0) { \
ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n); \ ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n); \
} \ } \
memcpy(temp, y_buf + n * SBPP, r * SBPP); \ memcpy(temp, y_buf + n * SBPP, r * SBPP); \
memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2, \ memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2, \
SS(r, UVSHIFT) * SBPP2); \ SS(r, UVSHIFT) * SBPP2); \
ANY_SIMD(temp, temp + 64, temp + 128, yuvconstants, MASK + 1); \ ANY_SIMD(temp, temp + 64, temp + 128, yuvconstants, MASK + 1); \
memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
} }
// Biplanar to RGB. // Biplanar to RGB.
@ -385,8 +386,8 @@ ANY21C(NV12ToRGB565Row_Any_MSA, NV12ToRGB565Row_MSA, 1, 1, 2, 2, 7)
// Any 1 to 1. // Any 1 to 1.
#define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ #define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \
SIMD_ALIGNED(uint8_t temp[128 * 2]); \ SIMD_ALIGNED(uint8_t temp[128 * 2]); \
memset(temp, 0, 128); /* for YUY2 and msan */ \ memset(temp, 0, 128); /* for YUY2 and msan */ \
int r = width & MASK; \ int r = width & MASK; \
int n = width & ~MASK; \ int n = width & ~MASK; \
@ -640,8 +641,8 @@ ANY11(ARGBExtractAlphaRow_Any_MSA, ARGBExtractAlphaRow_MSA, 0, 4, 1, 15)
// Any 1 to 1 blended. Destination is read, modify, write. // Any 1 to 1 blended. Destination is read, modify, write.
#define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ #define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \
SIMD_ALIGNED(uint8_t temp[64 * 2]); \ SIMD_ALIGNED(uint8_t temp[64 * 2]); \
memset(temp, 0, 64 * 2); /* for msan */ \ memset(temp, 0, 64 * 2); /* for msan */ \
int r = width & MASK; \ int r = width & MASK; \
int n = width & ~MASK; \ int n = width & ~MASK; \
@ -669,18 +670,18 @@ ANY11B(ARGBCopyYToAlphaRow_Any_SSE2, ARGBCopyYToAlphaRow_SSE2, 0, 1, 4, 7)
#undef ANY11B #undef ANY11B
// Any 1 to 1 with parameter. // Any 1 to 1 with parameter.
#define ANY11P(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK) \ #define ANY11P(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK) \
void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, T param, int width) { \ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, T param, int width) { \
SIMD_ALIGNED(uint8_t temp[64 * 2]); \ SIMD_ALIGNED(uint8_t temp[64 * 2]); \
memset(temp, 0, 64); /* for msan */ \ memset(temp, 0, 64); /* for msan */ \
int r = width & MASK; \ int r = width & MASK; \
int n = width & ~MASK; \ int n = width & ~MASK; \
if (n > 0) { \ if (n > 0) { \
ANY_SIMD(src_ptr, dst_ptr, param, n); \ ANY_SIMD(src_ptr, dst_ptr, param, n); \
} \ } \
memcpy(temp, src_ptr + n * SBPP, r * SBPP); \ memcpy(temp, src_ptr + n * SBPP, r * SBPP); \
ANY_SIMD(temp, temp + 64, param, MASK + 1); \ ANY_SIMD(temp, temp + 64, param, MASK + 1); \
memcpy(dst_ptr + n * BPP, temp + 64, r * BPP); \ memcpy(dst_ptr + n * BPP, temp + 64, r * BPP); \
} }
#if defined(HAS_ARGBTORGB565DITHERROW_SSE2) #if defined(HAS_ARGBTORGB565DITHERROW_SSE2)
@ -755,29 +756,48 @@ ANY11C(Convert16To8Row_Any_SSSE3,
15) 15)
#endif #endif
#ifdef HAS_CONVERT16TO8ROW_AVX2 #ifdef HAS_CONVERT16TO8ROW_AVX2
ANY11C(Convert16To8Row_Any_AVX2, Convert16To8Row_AVX2, 2, 1, uint16_t, uint8_t, 31) ANY11C(Convert16To8Row_Any_AVX2,
Convert16To8Row_AVX2,
2,
1,
uint16_t,
uint8_t,
31)
#endif #endif
#ifdef HAS_CONVERT8TO16ROW_SSE2 #ifdef HAS_CONVERT8TO16ROW_SSE2
ANY11C(Convert8To16Row_Any_SSE2, Convert8To16Row_SSE2, 1, 2, uint8_t, uint16_t, 15) ANY11C(Convert8To16Row_Any_SSE2,
Convert8To16Row_SSE2,
1,
2,
uint8_t,
uint16_t,
15)
#endif #endif
#ifdef HAS_CONVERT8TO16ROW_AVX2 #ifdef HAS_CONVERT8TO16ROW_AVX2
ANY11C(Convert8To16Row_Any_AVX2, Convert8To16Row_AVX2, 1, 2, uint8_t, uint16_t, 31) ANY11C(Convert8To16Row_Any_AVX2,
Convert8To16Row_AVX2,
1,
2,
uint8_t,
uint16_t,
31)
#endif #endif
#undef ANY11C #undef ANY11C
// Any 1 to 1 with parameter and shorts to byte. BPP measures in shorts. // Any 1 to 1 with parameter and shorts to byte. BPP measures in shorts.
#define ANY11P16(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK) \ #define ANY11P16(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK) \
void NAMEANY(const uint16_t* src_ptr, uint16_t* dst_ptr, T param, int width) { \ void NAMEANY(const uint16_t* src_ptr, uint16_t* dst_ptr, T param, \
SIMD_ALIGNED(uint16_t temp[32 * 2]); \ int width) { \
memset(temp, 0, 64); /* for msan */ \ SIMD_ALIGNED(uint16_t temp[32 * 2]); \
int r = width & MASK; \ memset(temp, 0, 64); /* for msan */ \
int n = width & ~MASK; \ int r = width & MASK; \
if (n > 0) { \ int n = width & ~MASK; \
ANY_SIMD(src_ptr, dst_ptr, param, n); \ if (n > 0) { \
} \ ANY_SIMD(src_ptr, dst_ptr, param, n); \
memcpy(temp, src_ptr + n, r * SBPP); \ } \
ANY_SIMD(temp, temp + 16, param, MASK + 1); \ memcpy(temp, src_ptr + n, r * SBPP); \
memcpy(dst_ptr + n, temp + 16, r * BPP); \ ANY_SIMD(temp, temp + 16, param, MASK + 1); \
memcpy(dst_ptr + n, temp + 16, r * BPP); \
} }
#ifdef HAS_HALFFLOATROW_SSE2 #ifdef HAS_HALFFLOATROW_SSE2
@ -801,9 +821,9 @@ ANY11P16(HalfFloatRow_Any_MSA, HalfFloatRow_MSA, float, 2, 2, 31)
// Any 1 to 1 with yuvconstants // Any 1 to 1 with yuvconstants
#define ANY11C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ #define ANY11C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, \ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, \
const struct YuvConstants* yuvconstants, int width) { \ const struct YuvConstants* yuvconstants, int width) { \
SIMD_ALIGNED(uint8_t temp[128 * 2]); \ SIMD_ALIGNED(uint8_t temp[128 * 2]); \
memset(temp, 0, 128); /* for YUY2 and msan */ \ memset(temp, 0, 128); /* for YUY2 and msan */ \
int r = width & MASK; \ int r = width & MASK; \
int n = width & ~MASK; \ int n = width & ~MASK; \
@ -833,20 +853,20 @@ ANY11C(UYVYToARGBRow_Any_MSA, UYVYToARGBRow_MSA, 1, 4, 4, 7)
#undef ANY11C #undef ANY11C
// Any 1 to 1 interpolate. Takes 2 rows of source via stride. // Any 1 to 1 interpolate. Takes 2 rows of source via stride.
#define ANY11T(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \ #define ANY11T(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \
void NAMEANY(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride_ptr, \ void NAMEANY(uint8_t* dst_ptr, const uint8_t* src_ptr, \
int width, int source_y_fraction) { \ ptrdiff_t src_stride_ptr, int width, int source_y_fraction) { \
SIMD_ALIGNED(uint8_t temp[64 * 3]); \ SIMD_ALIGNED(uint8_t temp[64 * 3]); \
memset(temp, 0, 64 * 2); /* for msan */ \ memset(temp, 0, 64 * 2); /* for msan */ \
int r = width & MASK; \ int r = width & MASK; \
int n = width & ~MASK; \ int n = width & ~MASK; \
if (n > 0) { \ if (n > 0) { \
ANY_SIMD(dst_ptr, src_ptr, src_stride_ptr, n, source_y_fraction); \ ANY_SIMD(dst_ptr, src_ptr, src_stride_ptr, n, source_y_fraction); \
} \ } \
memcpy(temp, src_ptr + n * SBPP, r * SBPP); \ memcpy(temp, src_ptr + n * SBPP, r * SBPP); \
memcpy(temp + 64, src_ptr + src_stride_ptr + n * SBPP, r * SBPP); \ memcpy(temp + 64, src_ptr + src_stride_ptr + n * SBPP, r * SBPP); \
ANY_SIMD(temp + 128, temp, 64, MASK + 1, source_y_fraction); \ ANY_SIMD(temp + 128, temp, 64, MASK + 1, source_y_fraction); \
memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
} }
#ifdef HAS_INTERPOLATEROW_AVX2 #ifdef HAS_INTERPOLATEROW_AVX2
@ -865,8 +885,8 @@ ANY11T(InterpolateRow_Any_MSA, InterpolateRow_MSA, 1, 1, 31)
// Any 1 to 1 mirror. // Any 1 to 1 mirror.
#define ANY11M(NAMEANY, ANY_SIMD, BPP, MASK) \ #define ANY11M(NAMEANY, ANY_SIMD, BPP, MASK) \
void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \
SIMD_ALIGNED(uint8_t temp[64 * 2]); \ SIMD_ALIGNED(uint8_t temp[64 * 2]); \
memset(temp, 0, 64); /* for msan */ \ memset(temp, 0, 64); /* for msan */ \
int r = width & MASK; \ int r = width & MASK; \
int n = width & ~MASK; \ int n = width & ~MASK; \
@ -905,16 +925,16 @@ ANY11M(ARGBMirrorRow_Any_MSA, ARGBMirrorRow_MSA, 4, 15)
#undef ANY11M #undef ANY11M
// Any 1 plane. (memset) // Any 1 plane. (memset)
#define ANY1(NAMEANY, ANY_SIMD, T, BPP, MASK) \ #define ANY1(NAMEANY, ANY_SIMD, T, BPP, MASK) \
void NAMEANY(uint8_t* dst_ptr, T v32, int width) { \ void NAMEANY(uint8_t* dst_ptr, T v32, int width) { \
SIMD_ALIGNED(uint8_t temp[64]); \ SIMD_ALIGNED(uint8_t temp[64]); \
int r = width & MASK; \ int r = width & MASK; \
int n = width & ~MASK; \ int n = width & ~MASK; \
if (n > 0) { \ if (n > 0) { \
ANY_SIMD(dst_ptr, v32, n); \ ANY_SIMD(dst_ptr, v32, n); \
} \ } \
ANY_SIMD(temp, v32, MASK + 1); \ ANY_SIMD(temp, v32, MASK + 1); \
memcpy(dst_ptr + n * BPP, temp, r * BPP); \ memcpy(dst_ptr + n * BPP, temp, r * BPP); \
} }
#ifdef HAS_SETROW_X86 #ifdef HAS_SETROW_X86
@ -932,19 +952,20 @@ ANY1(ARGBSetRow_Any_MSA, ARGBSetRow_MSA, uint32_t, 4, 3)
#undef ANY1 #undef ANY1
// Any 1 to 2. Outputs UV planes. // Any 1 to 2. Outputs UV planes.
#define ANY12(NAMEANY, ANY_SIMD, UVSHIFT, BPP, DUVSHIFT, MASK) \ #define ANY12(NAMEANY, ANY_SIMD, UVSHIFT, BPP, DUVSHIFT, MASK) \
void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, int width) { \ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, \
SIMD_ALIGNED(uint8_t temp[128 * 3]); \ int width) { \
memset(temp, 0, 128); /* for msan */ \ SIMD_ALIGNED(uint8_t temp[128 * 3]); \
int r = width & MASK; \ memset(temp, 0, 128); /* for msan */ \
int n = width & ~MASK; \ int r = width & MASK; \
if (n > 0) { \ int n = width & ~MASK; \
ANY_SIMD(src_ptr, dst_u, dst_v, n); \ if (n > 0) { \
} \ ANY_SIMD(src_ptr, dst_u, dst_v, n); \
memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \ } \
ANY_SIMD(temp, temp + 128, temp + 256, MASK + 1); \ memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \
memcpy(dst_u + (n >> DUVSHIFT), temp + 128, SS(r, DUVSHIFT)); \ ANY_SIMD(temp, temp + 128, temp + 256, MASK + 1); \
memcpy(dst_v + (n >> DUVSHIFT), temp + 256, SS(r, DUVSHIFT)); \ memcpy(dst_u + (n >> DUVSHIFT), temp + 128, SS(r, DUVSHIFT)); \
memcpy(dst_v + (n >> DUVSHIFT), temp + 256, SS(r, DUVSHIFT)); \
} }
#ifdef HAS_SPLITUVROW_SSE2 #ifdef HAS_SPLITUVROW_SSE2
@ -983,21 +1004,21 @@ ANY12(UYVYToUV422Row_Any_MSA, UYVYToUV422Row_MSA, 1, 4, 1, 31)
#undef ANY12 #undef ANY12
// Any 1 to 3. Outputs RGB planes. // Any 1 to 3. Outputs RGB planes.
#define ANY13(NAMEANY, ANY_SIMD, BPP, MASK) \ #define ANY13(NAMEANY, ANY_SIMD, BPP, MASK) \
void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_r, uint8_t* dst_g, uint8_t* dst_b, \ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_r, uint8_t* dst_g, \
int width) { \ uint8_t* dst_b, int width) { \
SIMD_ALIGNED(uint8_t temp[16 * 6]); \ SIMD_ALIGNED(uint8_t temp[16 * 6]); \
memset(temp, 0, 16 * 3); /* for msan */ \ memset(temp, 0, 16 * 3); /* for msan */ \
int r = width & MASK; \ int r = width & MASK; \
int n = width & ~MASK; \ int n = width & ~MASK; \
if (n > 0) { \ if (n > 0) { \
ANY_SIMD(src_ptr, dst_r, dst_g, dst_b, n); \ ANY_SIMD(src_ptr, dst_r, dst_g, dst_b, n); \
} \ } \
memcpy(temp, src_ptr + n * BPP, r * BPP); \ memcpy(temp, src_ptr + n * BPP, r * BPP); \
ANY_SIMD(temp, temp + 16 * 3, temp + 16 * 4, temp + 16 * 5, MASK + 1); \ ANY_SIMD(temp, temp + 16 * 3, temp + 16 * 4, temp + 16 * 5, MASK + 1); \
memcpy(dst_r + n, temp + 16 * 3, r); \ memcpy(dst_r + n, temp + 16 * 3, r); \
memcpy(dst_g + n, temp + 16 * 4, r); \ memcpy(dst_g + n, temp + 16 * 4, r); \
memcpy(dst_b + n, temp + 16 * 5, r); \ memcpy(dst_b + n, temp + 16 * 5, r); \
} }
#ifdef HAS_SPLITRGBROW_SSSE3 #ifdef HAS_SPLITRGBROW_SSSE3
@ -1010,9 +1031,9 @@ ANY13(SplitRGBRow_Any_NEON, SplitRGBRow_NEON, 3, 15)
// Any 1 to 2 with source stride (2 rows of source). Outputs UV planes. // Any 1 to 2 with source stride (2 rows of source). Outputs UV planes.
// 128 byte row allows for 32 avx ARGB pixels. // 128 byte row allows for 32 avx ARGB pixels.
#define ANY12S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \ #define ANY12S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \
void NAMEANY(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, \ void NAMEANY(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, \
uint8_t* dst_v, int width) { \ uint8_t* dst_v, int width) { \
SIMD_ALIGNED(uint8_t temp[128 * 4]); \ SIMD_ALIGNED(uint8_t temp[128 * 4]); \
memset(temp, 0, 128 * 2); /* for msan */ \ memset(temp, 0, 128 * 2); /* for msan */ \
int r = width & MASK; \ int r = width & MASK; \
int n = width & ~MASK; \ int n = width & ~MASK; \

View File

@ -10,8 +10,8 @@
#include "libyuv/row.h" #include "libyuv/row.h"
#include <string.h> // For memcpy and memset.
#include <stdio.h> #include <stdio.h>
#include <string.h> // For memcpy and memset.
#include "libyuv/basic_types.h" #include "libyuv/basic_types.h"
@ -125,7 +125,9 @@ void RAWToRGB24Row_C(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
} }
} }
void RGB565ToARGBRow_C(const uint8_t* src_rgb565, uint8_t* dst_argb, int width) { void RGB565ToARGBRow_C(const uint8_t* src_rgb565,
uint8_t* dst_argb,
int width) {
int x; int x;
for (x = 0; x < width; ++x) { for (x = 0; x < width; ++x) {
uint8_t b = src_rgb565[0] & 0x1f; uint8_t b = src_rgb565[0] & 0x1f;
@ -291,7 +293,7 @@ void ARGBToARGB1555Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
uint8_t r1 = src_argb[6] >> 3; uint8_t r1 = src_argb[6] >> 3;
uint8_t a1 = src_argb[7] >> 7; uint8_t a1 = src_argb[7] >> 7;
*(uint32_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15) | *(uint32_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15) |
(b1 << 16) | (g1 << 21) | (r1 << 26) | (a1 << 31); (b1 << 16) | (g1 << 21) | (r1 << 26) | (a1 << 31);
dst_rgb += 4; dst_rgb += 4;
src_argb += 8; src_argb += 8;
} }
@ -315,8 +317,8 @@ void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
uint8_t g1 = src_argb[5] >> 4; uint8_t g1 = src_argb[5] >> 4;
uint8_t r1 = src_argb[6] >> 4; uint8_t r1 = src_argb[6] >> 4;
uint8_t a1 = src_argb[7] >> 4; uint8_t a1 = src_argb[7] >> 4;
*(uint32_t*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) | (b1 << 16) | *(uint32_t*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) |
(g1 << 20) | (r1 << 24) | (a1 << 28); (b1 << 16) | (g1 << 20) | (r1 << 24) | (a1 << 28);
dst_rgb += 4; dst_rgb += 4;
src_argb += 8; src_argb += 8;
} }
@ -354,43 +356,43 @@ static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) {
} }
// ARGBToY_C and ARGBToUV_C // ARGBToY_C and ARGBToUV_C
#define MAKEROWY(NAME, R, G, B, BPP) \ #define MAKEROWY(NAME, R, G, B, BPP) \
void NAME##ToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \ void NAME##ToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \
int x; \ int x; \
for (x = 0; x < width; ++x) { \ for (x = 0; x < width; ++x) { \
dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]); \ dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]); \
src_argb0 += BPP; \ src_argb0 += BPP; \
dst_y += 1; \ dst_y += 1; \
} \ } \
} \ } \
void NAME##ToUVRow_C(const uint8_t* src_rgb0, int src_stride_rgb, \ void NAME##ToUVRow_C(const uint8_t* src_rgb0, int src_stride_rgb, \
uint8_t* dst_u, uint8_t* dst_v, int width) { \ uint8_t* dst_u, uint8_t* dst_v, int width) { \
const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb; \ const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb; \
int x; \ int x; \
for (x = 0; x < width - 1; x += 2) { \ for (x = 0; x < width - 1; x += 2) { \
uint8_t ab = (src_rgb0[B] + src_rgb0[B + BPP] + src_rgb1[B] + \ uint8_t ab = (src_rgb0[B] + src_rgb0[B + BPP] + src_rgb1[B] + \
src_rgb1[B + BPP]) >> \ src_rgb1[B + BPP]) >> \
2; \ 2; \
uint8_t ag = (src_rgb0[G] + src_rgb0[G + BPP] + src_rgb1[G] + \ uint8_t ag = (src_rgb0[G] + src_rgb0[G + BPP] + src_rgb1[G] + \
src_rgb1[G + BPP]) >> \ src_rgb1[G + BPP]) >> \
2; \ 2; \
uint8_t ar = (src_rgb0[R] + src_rgb0[R + BPP] + src_rgb1[R] + \ uint8_t ar = (src_rgb0[R] + src_rgb0[R + BPP] + src_rgb1[R] + \
src_rgb1[R + BPP]) >> \ src_rgb1[R + BPP]) >> \
2; \ 2; \
dst_u[0] = RGBToU(ar, ag, ab); \ dst_u[0] = RGBToU(ar, ag, ab); \
dst_v[0] = RGBToV(ar, ag, ab); \ dst_v[0] = RGBToV(ar, ag, ab); \
src_rgb0 += BPP * 2; \ src_rgb0 += BPP * 2; \
src_rgb1 += BPP * 2; \ src_rgb1 += BPP * 2; \
dst_u += 1; \ dst_u += 1; \
dst_v += 1; \ dst_v += 1; \
} \ } \
if (width & 1) { \ if (width & 1) { \
uint8_t ab = (src_rgb0[B] + src_rgb1[B]) >> 1; \ uint8_t ab = (src_rgb0[B] + src_rgb1[B]) >> 1; \
uint8_t ag = (src_rgb0[G] + src_rgb1[G]) >> 1; \ uint8_t ag = (src_rgb0[G] + src_rgb1[G]) >> 1; \
uint8_t ar = (src_rgb0[R] + src_rgb1[R]) >> 1; \ uint8_t ar = (src_rgb0[R] + src_rgb1[R]) >> 1; \
dst_u[0] = RGBToU(ar, ag, ab); \ dst_u[0] = RGBToU(ar, ag, ab); \
dst_v[0] = RGBToV(ar, ag, ab); \ dst_v[0] = RGBToV(ar, ag, ab); \
} \ } \
} }
MAKEROWY(ARGB, 2, 1, 0, 4) MAKEROWY(ARGB, 2, 1, 0, 4)
@ -440,40 +442,40 @@ static __inline int RGBToVJ(uint8_t r, uint8_t g, uint8_t b) {
#define AVGB(a, b) (((a) + (b) + 1) >> 1) #define AVGB(a, b) (((a) + (b) + 1) >> 1)
// ARGBToYJ_C and ARGBToUVJ_C // ARGBToYJ_C and ARGBToUVJ_C
#define MAKEROWYJ(NAME, R, G, B, BPP) \ #define MAKEROWYJ(NAME, R, G, B, BPP) \
void NAME##ToYJRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \ void NAME##ToYJRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \
int x; \ int x; \
for (x = 0; x < width; ++x) { \ for (x = 0; x < width; ++x) { \
dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]); \ dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]); \
src_argb0 += BPP; \ src_argb0 += BPP; \
dst_y += 1; \ dst_y += 1; \
} \ } \
} \ } \
void NAME##ToUVJRow_C(const uint8_t* src_rgb0, int src_stride_rgb, \ void NAME##ToUVJRow_C(const uint8_t* src_rgb0, int src_stride_rgb, \
uint8_t* dst_u, uint8_t* dst_v, int width) { \ uint8_t* dst_u, uint8_t* dst_v, int width) { \
const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb; \ const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb; \
int x; \ int x; \
for (x = 0; x < width - 1; x += 2) { \ for (x = 0; x < width - 1; x += 2) { \
uint8_t ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]), \ uint8_t ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]), \
AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP])); \ AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP])); \
uint8_t ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]), \ uint8_t ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]), \
AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP])); \ AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP])); \
uint8_t ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]), \ uint8_t ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]), \
AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP])); \ AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP])); \
dst_u[0] = RGBToUJ(ar, ag, ab); \ dst_u[0] = RGBToUJ(ar, ag, ab); \
dst_v[0] = RGBToVJ(ar, ag, ab); \ dst_v[0] = RGBToVJ(ar, ag, ab); \
src_rgb0 += BPP * 2; \ src_rgb0 += BPP * 2; \
src_rgb1 += BPP * 2; \ src_rgb1 += BPP * 2; \
dst_u += 1; \ dst_u += 1; \
dst_v += 1; \ dst_v += 1; \
} \ } \
if (width & 1) { \ if (width & 1) { \
uint8_t ab = AVGB(src_rgb0[B], src_rgb1[B]); \ uint8_t ab = AVGB(src_rgb0[B], src_rgb1[B]); \
uint8_t ag = AVGB(src_rgb0[G], src_rgb1[G]); \ uint8_t ag = AVGB(src_rgb0[G], src_rgb1[G]); \
uint8_t ar = AVGB(src_rgb0[R], src_rgb1[R]); \ uint8_t ar = AVGB(src_rgb0[R], src_rgb1[R]); \
dst_u[0] = RGBToUJ(ar, ag, ab); \ dst_u[0] = RGBToUJ(ar, ag, ab); \
dst_v[0] = RGBToVJ(ar, ag, ab); \ dst_v[0] = RGBToVJ(ar, ag, ab); \
} \ } \
} }
MAKEROWYJ(ARGB, 2, 1, 0, 4) MAKEROWYJ(ARGB, 2, 1, 0, 4)
@ -756,7 +758,9 @@ void ARGBColorMatrixRow_C(const uint8_t* src_argb,
} }
// Apply color table to a row of image. // Apply color table to a row of image.
void ARGBColorTableRow_C(uint8_t* dst_argb, const uint8_t* table_argb, int width) { void ARGBColorTableRow_C(uint8_t* dst_argb,
const uint8_t* table_argb,
int width) {
int x; int x;
for (x = 0; x < width; ++x) { for (x = 0; x < width; ++x) {
int b = dst_argb[0]; int b = dst_argb[0];
@ -772,7 +776,9 @@ void ARGBColorTableRow_C(uint8_t* dst_argb, const uint8_t* table_argb, int width
} }
// Apply color table to a row of image. // Apply color table to a row of image.
void RGBColorTableRow_C(uint8_t* dst_argb, const uint8_t* table_argb, int width) { void RGBColorTableRow_C(uint8_t* dst_argb,
const uint8_t* table_argb,
int width) {
int x; int x;
for (x = 0; x < width; ++x) { for (x = 0; x < width; ++x) {
int b = dst_argb[0]; int b = dst_argb[0];
@ -1535,10 +1541,7 @@ void I210ToARGBRow_C(const uint16_t* src_y,
} }
} }
static void StoreAR30(uint8_t* rgb_buf, static void StoreAR30(uint8_t* rgb_buf, int b, int g, int r) {
int b,
int g,
int r) {
uint32_t ar30; uint32_t ar30;
b = b >> 4; // convert 10.6 to 10 bit. b = b >> 4; // convert 10.6 to 10 bit.
g = g >> 4; g = g >> 4;
@ -1577,7 +1580,6 @@ void I210ToAR30Row_C(const uint16_t* src_y,
} }
} }
// 8 bit YUV to 10 bit AR30 // 8 bit YUV to 10 bit AR30
// Uses same code as 10 bit YUV bit shifts the 8 bit values up to 10 bits. // Uses same code as 10 bit YUV bit shifts the 8 bit values up to 10 bits.
void I422ToAR30Row_C(const uint8_t* src_y, void I422ToAR30Row_C(const uint8_t* src_y,
@ -1680,7 +1682,7 @@ void I422ToARGB4444Row_C(const uint8_t* src_y,
g1 = g1 >> 4; g1 = g1 >> 4;
r1 = r1 >> 4; r1 = r1 >> 4;
*(uint32_t*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | (b1 << 16) | *(uint32_t*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | (b1 << 16) |
(g1 << 20) | (r1 << 24) | 0xf000f000; (g1 << 20) | (r1 << 24) | 0xf000f000;
src_y += 2; src_y += 2;
src_u += 1; src_u += 1;
src_v += 1; src_v += 1;
@ -1718,7 +1720,7 @@ void I422ToARGB1555Row_C(const uint8_t* src_y,
g1 = g1 >> 3; g1 = g1 >> 3;
r1 = r1 >> 3; r1 = r1 >> 3;
*(uint32_t*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | (b1 << 16) | *(uint32_t*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | (b1 << 16) |
(g1 << 21) | (r1 << 26) | 0x80008000; (g1 << 21) | (r1 << 26) | 0x80008000;
src_y += 2; src_y += 2;
src_u += 1; src_u += 1;
src_v += 1; src_v += 1;
@ -1954,7 +1956,10 @@ void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width) {
} }
} }
void MirrorUVRow_C(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width) { void MirrorUVRow_C(const uint8_t* src_uv,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
int x; int x;
src_uv += (width - 1) << 1; src_uv += (width - 1) << 1;
for (x = 0; x < width - 1; x += 2) { for (x = 0; x < width - 1; x += 2) {
@ -1985,7 +1990,10 @@ void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width) {
} }
} }
void SplitUVRow_C(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width) { void SplitUVRow_C(const uint8_t* src_uv,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
int x; int x;
for (x = 0; x < width - 1; x += 2) { for (x = 0; x < width - 1; x += 2) {
dst_u[x] = src_uv[0]; dst_u[x] = src_uv[0];
@ -2385,7 +2393,9 @@ const uint32_t fixed_invtbl8[256] = {
T(0xfc), T(0xfd), T(0xfe), 0x01000100}; T(0xfc), T(0xfd), T(0xfe), 0x01000100};
#undef T #undef T
void ARGBUnattenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) { void ARGBUnattenuateRow_C(const uint8_t* src_argb,
uint8_t* dst_argb,
int width) {
int i; int i;
for (i = 0; i < width; ++i) { for (i = 0; i < width; ++i) {
uint32_t b = src_argb[0]; uint32_t b = src_argb[0];
@ -2673,7 +2683,10 @@ void ARGBPolynomialRow_C(const uint8_t* src_argb,
// simply extract the low bits of the exponent and the high // simply extract the low bits of the exponent and the high
// bits of the mantissa from our float and we're done. // bits of the mantissa from our float and we're done.
void HalfFloatRow_C(const uint16_t* src, uint16_t* dst, float scale, int width) { void HalfFloatRow_C(const uint16_t* src,
uint16_t* dst,
float scale,
int width) {
int i; int i;
float mult = 1.9259299444e-34f * scale; float mult = 1.9259299444e-34f * scale;
for (i = 0; i < width; ++i) { for (i = 0; i < width; ++i) {

File diff suppressed because it is too large Load Diff

View File

@ -37,14 +37,14 @@ extern "C" {
} }
// Load YUV 422 pixel data // Load YUV 422 pixel data
#define READYUV422(psrc_y, psrc_u, psrc_v, out_y, out_u, out_v) \ #define READYUV422(psrc_y, psrc_u, psrc_v, out_y, out_u, out_v) \
{ \ { \
uint64_t y_m; \ uint64_t y_m; \
uint32_t u_m, v_m; \ uint32_t u_m, v_m; \
v4i32 zero_m = {0}; \ v4i32 zero_m = {0}; \
y_m = LD(psrc_y); \ y_m = LD(psrc_y); \
u_m = LW(psrc_u); \ u_m = LW(psrc_u); \
v_m = LW(psrc_v); \ v_m = LW(psrc_v); \
out_y = (v16u8)__msa_insert_d((v2i64)zero_m, 0, (int64_t)y_m); \ out_y = (v16u8)__msa_insert_d((v2i64)zero_m, 0, (int64_t)y_m); \
out_u = (v16u8)__msa_insert_w(zero_m, 0, (int32_t)u_m); \ out_u = (v16u8)__msa_insert_w(zero_m, 0, (int32_t)u_m); \
out_v = (v16u8)__msa_insert_w(zero_m, 0, (int32_t)v_m); \ out_v = (v16u8)__msa_insert_w(zero_m, 0, (int32_t)v_m); \
@ -275,14 +275,14 @@ extern "C" {
// Load I444 pixel data // Load I444 pixel data
#define READI444(psrc_y, psrc_u, psrc_v, out_y, out_u, out_v) \ #define READI444(psrc_y, psrc_u, psrc_v, out_y, out_u, out_v) \
{ \ { \
uint64_t y_m, u_m, v_m; \ uint64_t y_m, u_m, v_m; \
v2i64 zero_m = {0}; \ v2i64 zero_m = {0}; \
y_m = LD(psrc_y); \ y_m = LD(psrc_y); \
u_m = LD(psrc_u); \ u_m = LD(psrc_u); \
v_m = LD(psrc_v); \ v_m = LD(psrc_v); \
out_y = (v16u8)__msa_insert_d(zero_m, 0, (int64_t)y_m); \ out_y = (v16u8)__msa_insert_d(zero_m, 0, (int64_t)y_m); \
out_u = (v16u8)__msa_insert_d(zero_m, 0, (int64_t)u_m); \ out_u = (v16u8)__msa_insert_d(zero_m, 0, (int64_t)u_m); \
out_v = (v16u8)__msa_insert_d(zero_m, 0, (int64_t)v_m); \ out_v = (v16u8)__msa_insert_d(zero_m, 0, (int64_t)v_m); \
} }
void MirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width) { void MirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width) {
@ -1014,7 +1014,9 @@ void ARGBToRGB565Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
} }
} }
void ARGBToARGB1555Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { void ARGBToARGB1555Row_MSA(const uint8_t* src_argb,
uint8_t* dst_rgb,
int width) {
int x; int x;
v16u8 src0, src1, dst0; v16u8 src0, src1, dst0;
v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9; v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9;
@ -1054,7 +1056,9 @@ void ARGBToARGB1555Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width)
} }
} }
void ARGBToARGB4444Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { void ARGBToARGB4444Row_MSA(const uint8_t* src_argb,
uint8_t* dst_rgb,
int width) {
int x; int x;
v16u8 src0, src1; v16u8 src0, src1;
v16u8 vec0, vec1; v16u8 vec0, vec1;
@ -1230,7 +1234,9 @@ void ARGBSubtractRow_MSA(const uint8_t* src_argb0,
} }
} }
void ARGBAttenuateRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, int width) { void ARGBAttenuateRow_MSA(const uint8_t* src_argb,
uint8_t* dst_argb,
int width) {
int x; int x;
v16u8 src0, src1, dst0, dst1; v16u8 src0, src1, dst0, dst1;
v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9; v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9;
@ -1547,7 +1553,9 @@ void ARGB1555ToARGBRow_MSA(const uint8_t* src_argb1555,
} }
} }
void RGB565ToARGBRow_MSA(const uint8_t* src_rgb565, uint8_t* dst_argb, int width) { void RGB565ToARGBRow_MSA(const uint8_t* src_rgb565,
uint8_t* dst_argb,
int width) {
int x; int x;
v8u16 src0, src1, vec0, vec1, vec2, vec3, vec4, vec5; v8u16 src0, src1, vec0, vec1, vec2, vec3, vec4, vec5;
v8u16 reg0, reg1, reg2, reg3, reg4, reg5; v8u16 reg0, reg1, reg2, reg3, reg4, reg5;
@ -1592,7 +1600,9 @@ void RGB565ToARGBRow_MSA(const uint8_t* src_rgb565, uint8_t* dst_argb, int width
} }
} }
void RGB24ToARGBRow_MSA(const uint8_t* src_rgb24, uint8_t* dst_argb, int width) { void RGB24ToARGBRow_MSA(const uint8_t* src_rgb24,
uint8_t* dst_argb,
int width) {
int x; int x;
v16u8 src0, src1, src2; v16u8 src0, src1, src2;
v16u8 vec0, vec1, vec2; v16u8 vec0, vec1, vec2;
@ -1642,7 +1652,9 @@ void RAWToARGBRow_MSA(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
} }
} }
void ARGB1555ToYRow_MSA(const uint8_t* src_argb1555, uint8_t* dst_y, int width) { void ARGB1555ToYRow_MSA(const uint8_t* src_argb1555,
uint8_t* dst_y,
int width) {
int x; int x;
v8u16 src0, src1, vec0, vec1, vec2, vec3, vec4, vec5; v8u16 src0, src1, vec0, vec1, vec2, vec3, vec4, vec5;
v8u16 reg0, reg1, reg2, reg3, reg4, reg5; v8u16 reg0, reg1, reg2, reg3, reg4, reg5;
@ -2969,7 +2981,9 @@ void MergeUVRow_MSA(const uint8_t* src_u,
} }
} }
void ARGBExtractAlphaRow_MSA(const uint8_t* src_argb, uint8_t* dst_a, int width) { void ARGBExtractAlphaRow_MSA(const uint8_t* src_argb,
uint8_t* dst_a,
int width) {
int i; int i;
v16u8 src0, src1, src2, src3, vec0, vec1, dst0; v16u8 src0, src1, src2, src3, vec0, vec1, dst0;
@ -3429,7 +3443,10 @@ void SobelYRow_MSA(const uint8_t* src_y0,
} }
} }
void HalfFloatRow_MSA(const uint16_t* src, uint16_t* dst, float scale, int width) { void HalfFloatRow_MSA(const uint16_t* src,
uint16_t* dst,
float scale,
int width) {
int i; int i;
v8u16 src0, src1, src2, src3, dst0, dst1, dst2, dst3; v8u16 src0, src1, src2, src3, dst0, dst1, dst2, dst3;
v4u32 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; v4u32 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;

View File

@ -694,7 +694,9 @@ void ARGBMirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width) {
: "cc", "memory", "r3", "q0"); : "cc", "memory", "r3", "q0");
} }
void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_argb, int width) { void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24,
uint8_t* dst_argb,
int width) {
asm volatile( asm volatile(
"vmov.u8 d4, #255 \n" // Alpha "vmov.u8 d4, #255 \n" // Alpha
"1: \n" "1: \n"
@ -756,7 +758,9 @@ void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
"vorr.u8 d2, d1, d5 \n" /* R */ \ "vorr.u8 d2, d1, d5 \n" /* R */ \
"vorr.u8 d1, d4, d6 \n" /* G */ "vorr.u8 d1, d4, d6 \n" /* G */
void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_argb, int width) { void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565,
uint8_t* dst_argb,
int width) {
asm volatile( asm volatile(
"vmov.u8 d3, #255 \n" // Alpha "vmov.u8 d3, #255 \n" // Alpha
"1: \n" "1: \n"
@ -848,7 +852,9 @@ void ARGB4444ToARGBRow_NEON(const uint8_t* src_argb4444,
); );
} }
void ARGBToRGB24Row_NEON(const uint8_t* src_argb, uint8_t* dst_rgb24, int width) { void ARGBToRGB24Row_NEON(const uint8_t* src_argb,
uint8_t* dst_rgb24,
int width) {
asm volatile( asm volatile(
"1: \n" "1: \n"
"vld4.8 {d1, d2, d3, d4}, [%0]! \n" // load 8 pixels of ARGB. "vld4.8 {d1, d2, d3, d4}, [%0]! \n" // load 8 pixels of ARGB.
@ -1070,7 +1076,9 @@ void I422ToUYVYRow_NEON(const uint8_t* src_y,
: "cc", "memory", "d0", "d1", "d2", "d3"); : "cc", "memory", "d0", "d1", "d2", "d3");
} }
void ARGBToRGB565Row_NEON(const uint8_t* src_argb, uint8_t* dst_rgb565, int width) { void ARGBToRGB565Row_NEON(const uint8_t* src_argb,
uint8_t* dst_rgb565,
int width) {
asm volatile( asm volatile(
"1: \n" "1: \n"
"vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB. "vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB.
@ -1166,7 +1174,9 @@ void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) {
: "cc", "memory", "q0", "q1", "q2", "q12", "q13"); : "cc", "memory", "q0", "q1", "q2", "q12", "q13");
} }
void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb, uint8_t* dst_a, int width) { void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb,
uint8_t* dst_a,
int width) {
asm volatile( asm volatile(
"1: \n" "1: \n"
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels
@ -1798,7 +1808,9 @@ void RGB565ToYRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
: "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"); : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13");
} }
void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555, uint8_t* dst_y, int width) { void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555,
uint8_t* dst_y,
int width) {
asm volatile( asm volatile(
"vmov.u8 d24, #13 \n" // B * 0.1016 coefficient "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient
"vmov.u8 d25, #65 \n" // G * 0.5078 coefficient "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient
@ -1822,7 +1834,9 @@ void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555, uint8_t* dst_y, int width)
: "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"); : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13");
} }
void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444, uint8_t* dst_y, int width) { void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444,
uint8_t* dst_y,
int width) {
asm volatile( asm volatile(
"vmov.u8 d24, #13 \n" // B * 0.1016 coefficient "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient
"vmov.u8 d25, #65 \n" // G * 0.5078 coefficient "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient
@ -2081,7 +2095,9 @@ void ARGBBlendRow_NEON(const uint8_t* src_argb0,
} }
// Attenuate 8 pixels at a time. // Attenuate 8 pixels at a time.
void ARGBAttenuateRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width) { void ARGBAttenuateRow_NEON(const uint8_t* src_argb,
uint8_t* dst_argb,
int width) {
asm volatile( asm volatile(
// Attenuate 8 pixels. // Attenuate 8 pixels.
"1: \n" "1: \n"
@ -2561,7 +2577,10 @@ void HalfFloat1Row_NEON(const uint16_t* src, uint16_t* dst, float, int width) {
} }
// TODO(fbarchard): multiply by element. // TODO(fbarchard): multiply by element.
void HalfFloatRow_NEON(const uint16_t* src, uint16_t* dst, float scale, int width) { void HalfFloatRow_NEON(const uint16_t* src,
uint16_t* dst,
float scale,
int width) {
asm volatile( asm volatile(
"vdup.32 q0, %3 \n" "vdup.32 q0, %3 \n"

View File

@ -733,7 +733,9 @@ void ARGBMirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width) {
: "cc", "memory", "v0"); : "cc", "memory", "v0");
} }
void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_argb, int width) { void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24,
uint8_t* dst_argb,
int width) {
asm volatile( asm volatile(
"movi v4.8b, #255 \n" // Alpha "movi v4.8b, #255 \n" // Alpha
"1: \n" "1: \n"
@ -797,7 +799,9 @@ void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
"orr v0.16b, v0.16b, v2.16b \n" /* R,B */ \ "orr v0.16b, v0.16b, v2.16b \n" /* R,B */ \
"dup v2.2D, v0.D[1] \n" /* R */ "dup v2.2D, v0.D[1] \n" /* R */
void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_argb, int width) { void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565,
uint8_t* dst_argb,
int width) {
asm volatile( asm volatile(
"movi v3.8b, #255 \n" // Alpha "movi v3.8b, #255 \n" // Alpha
"1: \n" "1: \n"
@ -902,7 +906,9 @@ void ARGB4444ToARGBRow_NEON(const uint8_t* src_argb4444,
); );
} }
void ARGBToRGB24Row_NEON(const uint8_t* src_argb, uint8_t* dst_rgb24, int width) { void ARGBToRGB24Row_NEON(const uint8_t* src_argb,
uint8_t* dst_rgb24,
int width) {
asm volatile( asm volatile(
"1: \n" "1: \n"
"ld4 {v1.8b,v2.8b,v3.8b,v4.8b}, [%0], #32 \n" // load 8 ARGB "ld4 {v1.8b,v2.8b,v3.8b,v4.8b}, [%0], #32 \n" // load 8 ARGB
@ -1126,7 +1132,9 @@ void I422ToUYVYRow_NEON(const uint8_t* src_y,
: "cc", "memory", "v0", "v1", "v2", "v3"); : "cc", "memory", "v0", "v1", "v2", "v3");
} }
void ARGBToRGB565Row_NEON(const uint8_t* src_argb, uint8_t* dst_rgb565, int width) { void ARGBToRGB565Row_NEON(const uint8_t* src_argb,
uint8_t* dst_rgb565,
int width) {
asm volatile( asm volatile(
"1: \n" "1: \n"
"ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%0], #32 \n" // load 8 pixels "ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%0], #32 \n" // load 8 pixels
@ -1223,7 +1231,9 @@ void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) {
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"); : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7");
} }
void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb, uint8_t* dst_a, int width) { void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb,
uint8_t* dst_a,
int width) {
asm volatile( asm volatile(
"1: \n" "1: \n"
"ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load row 16 "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load row 16
@ -1829,7 +1839,9 @@ void RGB565ToYRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
"v27"); "v27");
} }
void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555, uint8_t* dst_y, int width) { void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555,
uint8_t* dst_y,
int width) {
asm volatile( asm volatile(
"movi v4.8b, #13 \n" // B * 0.1016 coefficient "movi v4.8b, #13 \n" // B * 0.1016 coefficient
"movi v5.8b, #65 \n" // G * 0.5078 coefficient "movi v5.8b, #65 \n" // G * 0.5078 coefficient
@ -1853,7 +1865,9 @@ void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555, uint8_t* dst_y, int width)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"); : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7");
} }
void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444, uint8_t* dst_y, int width) { void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444,
uint8_t* dst_y,
int width) {
asm volatile( asm volatile(
"movi v24.8b, #13 \n" // B * 0.1016 coefficient "movi v24.8b, #13 \n" // B * 0.1016 coefficient
"movi v25.8b, #65 \n" // G * 0.5078 coefficient "movi v25.8b, #65 \n" // G * 0.5078 coefficient
@ -2121,7 +2135,9 @@ void ARGBBlendRow_NEON(const uint8_t* src_argb0,
} }
// Attenuate 8 pixels at a time. // Attenuate 8 pixels at a time.
void ARGBAttenuateRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width) { void ARGBAttenuateRow_NEON(const uint8_t* src_argb,
uint8_t* dst_argb,
int width) {
asm volatile( asm volatile(
// Attenuate 8 pixels. // Attenuate 8 pixels.
"1: \n" "1: \n"
@ -2604,7 +2620,10 @@ void HalfFloat1Row_NEON(const uint16_t* src, uint16_t* dst, float, int width) {
: "cc", "memory", "v1", "v2", "v3"); : "cc", "memory", "v1", "v2", "v3");
} }
void HalfFloatRow_NEON(const uint16_t* src, uint16_t* dst, float scale, int width) { void HalfFloatRow_NEON(const uint16_t* src,
uint16_t* dst,
float scale,
int width) {
asm volatile( asm volatile(
"1: \n" "1: \n"
"ld1 {v1.16b}, [%0], #16 \n" // load 8 shorts "ld1 {v1.16b}, [%0], #16 \n" // load 8 shorts

View File

@ -28,27 +28,27 @@ extern "C" {
#if defined(_M_X64) #if defined(_M_X64)
// Read 4 UV from 422, upsample to 8 UV. // Read 4 UV from 422, upsample to 8 UV.
#define READYUV422 \ #define READYUV422 \
xmm0 = _mm_cvtsi32_si128(*(uint32_t*)u_buf); \ xmm0 = _mm_cvtsi32_si128(*(uint32_t*)u_buf); \
xmm1 = _mm_cvtsi32_si128(*(uint32_t*)(u_buf + offset)); \ xmm1 = _mm_cvtsi32_si128(*(uint32_t*)(u_buf + offset)); \
xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \ xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \
xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \ xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \
u_buf += 4; \ u_buf += 4; \
xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \ xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \
xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \ xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \
y_buf += 8; y_buf += 8;
// Read 4 UV from 422, upsample to 8 UV. With 8 Alpha. // Read 4 UV from 422, upsample to 8 UV. With 8 Alpha.
#define READYUVA422 \ #define READYUVA422 \
xmm0 = _mm_cvtsi32_si128(*(uint32_t*)u_buf); \ xmm0 = _mm_cvtsi32_si128(*(uint32_t*)u_buf); \
xmm1 = _mm_cvtsi32_si128(*(uint32_t*)(u_buf + offset)); \ xmm1 = _mm_cvtsi32_si128(*(uint32_t*)(u_buf + offset)); \
xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \ xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \
xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \ xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \
u_buf += 4; \ u_buf += 4; \
xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \ xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \
xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \ xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \
y_buf += 8; \ y_buf += 8; \
xmm5 = _mm_loadl_epi64((__m128i*)a_buf); \ xmm5 = _mm_loadl_epi64((__m128i*)a_buf); \
a_buf += 8; a_buf += 8;
// Convert 8 pixels: 8 UV and 8 Y. // Convert 8 pixels: 8 UV and 8 Y.
@ -3022,7 +3022,9 @@ __declspec(naked) void MirrorRow_SSSE3(const uint8_t* src,
#endif // HAS_MIRRORROW_SSSE3 #endif // HAS_MIRRORROW_SSSE3
#ifdef HAS_MIRRORROW_AVX2 #ifdef HAS_MIRRORROW_AVX2
__declspec(naked) void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width) { __declspec(naked) void MirrorRow_AVX2(const uint8_t* src,
uint8_t* dst,
int width) {
__asm { __asm {
mov eax, [esp + 4] // src mov eax, [esp + 4] // src
mov edx, [esp + 8] // dst mov edx, [esp + 8] // dst
@ -3274,7 +3276,9 @@ __declspec(naked) void MergeUVRow_AVX2(const uint8_t* src_u,
#ifdef HAS_COPYROW_SSE2 #ifdef HAS_COPYROW_SSE2
// CopyRow copys 'count' bytes using a 16 byte load/store, 32 bytes at time. // CopyRow copys 'count' bytes using a 16 byte load/store, 32 bytes at time.
__declspec(naked) void CopyRow_SSE2(const uint8_t* src, uint8_t* dst, int count) { __declspec(naked) void CopyRow_SSE2(const uint8_t* src,
uint8_t* dst,
int count) {
__asm { __asm {
mov eax, [esp + 4] // src mov eax, [esp + 4] // src
mov edx, [esp + 8] // dst mov edx, [esp + 8] // dst
@ -3311,7 +3315,9 @@ __declspec(naked) void CopyRow_SSE2(const uint8_t* src, uint8_t* dst, int count)
#ifdef HAS_COPYROW_AVX #ifdef HAS_COPYROW_AVX
// CopyRow copys 'count' bytes using a 32 byte load/store, 64 bytes at time. // CopyRow copys 'count' bytes using a 32 byte load/store, 64 bytes at time.
__declspec(naked) void CopyRow_AVX(const uint8_t* src, uint8_t* dst, int count) { __declspec(naked) void CopyRow_AVX(const uint8_t* src,
uint8_t* dst,
int count) {
__asm { __asm {
mov eax, [esp + 4] // src mov eax, [esp + 4] // src
mov edx, [esp + 8] // dst mov edx, [esp + 8] // dst
@ -3334,7 +3340,9 @@ __declspec(naked) void CopyRow_AVX(const uint8_t* src, uint8_t* dst, int count)
#endif // HAS_COPYROW_AVX #endif // HAS_COPYROW_AVX
// Multiple of 1. // Multiple of 1.
__declspec(naked) void CopyRow_ERMS(const uint8_t* src, uint8_t* dst, int count) { __declspec(naked) void CopyRow_ERMS(const uint8_t* src,
uint8_t* dst,
int count) {
__asm { __asm {
mov eax, esi mov eax, esi
mov edx, edi mov edx, edi
@ -3582,7 +3590,9 @@ __declspec(naked) void SetRow_ERMS(uint8_t* dst, uint8_t v8, int count) {
} }
// Write 'count' 32 bit values. // Write 'count' 32 bit values.
__declspec(naked) void ARGBSetRow_X86(uint8_t* dst_argb, uint32_t v32, int count) { __declspec(naked) void ARGBSetRow_X86(uint8_t* dst_argb,
uint32_t v32,
int count) {
__asm { __asm {
mov edx, edi mov edx, edi
mov edi, [esp + 4] // dst mov edi, [esp + 4] // dst

View File

@ -815,8 +815,8 @@ static void ScalePlaneBox(int src_width,
const uint16_t* src_ptr, uint8_t* dst_ptr) = const uint16_t* src_ptr, uint8_t* dst_ptr) =
(dx & 0xffff) ? ScaleAddCols2_C (dx & 0xffff) ? ScaleAddCols2_C
: ((dx != 0x10000) ? ScaleAddCols1_C : ScaleAddCols0_C); : ((dx != 0x10000) ? ScaleAddCols1_C : ScaleAddCols0_C);
void (*ScaleAddRow)(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) = void (*ScaleAddRow)(const uint8_t* src_ptr, uint16_t* dst_ptr,
ScaleAddRow_C; int src_width) = ScaleAddRow_C;
#if defined(HAS_SCALEADDROW_SSE2) #if defined(HAS_SCALEADDROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) { if (TestCpuFlag(kCpuHasSSE2)) {
ScaleAddRow = ScaleAddRow_Any_SSE2; ScaleAddRow = ScaleAddRow_Any_SSE2;
@ -895,8 +895,8 @@ static void ScalePlaneBox_16(int src_width,
void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx, void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
const uint32_t* src_ptr, uint16_t* dst_ptr) = const uint32_t* src_ptr, uint16_t* dst_ptr) =
(dx & 0xffff) ? ScaleAddCols2_16_C : ScaleAddCols1_16_C; (dx & 0xffff) ? ScaleAddCols2_16_C : ScaleAddCols1_16_C;
void (*ScaleAddRow)(const uint16_t* src_ptr, uint32_t* dst_ptr, int src_width) = void (*ScaleAddRow)(const uint16_t* src_ptr, uint32_t* dst_ptr,
ScaleAddRow_16_C; int src_width) = ScaleAddRow_16_C;
#if defined(HAS_SCALEADDROW_16_SSE2) #if defined(HAS_SCALEADDROW_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(src_width, 16)) { if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(src_width, 16)) {
@ -946,8 +946,8 @@ void ScalePlaneBilinearDown(int src_width,
const int max_y = (src_height - 1) << 16; const int max_y = (src_height - 1) << 16;
int j; int j;
void (*ScaleFilterCols)(uint8_t * dst_ptr, const uint8_t* src_ptr, int dst_width, void (*ScaleFilterCols)(uint8_t * dst_ptr, const uint8_t* src_ptr,
int x, int dx) = int dst_width, int x, int dx) =
(src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C; (src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C;
void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr, void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
ptrdiff_t src_stride, int dst_width, ptrdiff_t src_stride, int dst_width,
@ -1144,8 +1144,8 @@ void ScalePlaneBilinearUp(int src_width,
void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr, void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
ptrdiff_t src_stride, int dst_width, ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C; int source_y_fraction) = InterpolateRow_C;
void (*ScaleFilterCols)(uint8_t * dst_ptr, const uint8_t* src_ptr, int dst_width, void (*ScaleFilterCols)(uint8_t * dst_ptr, const uint8_t* src_ptr,
int x, int dx) = int dst_width, int x, int dx) =
filtering ? ScaleFilterCols_C : ScaleCols_C; filtering ? ScaleFilterCols_C : ScaleCols_C;
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
&dx, &dy); &dx, &dy);
@ -1401,8 +1401,8 @@ static void ScalePlaneSimple(int src_width,
const uint8_t* src_ptr, const uint8_t* src_ptr,
uint8_t* dst_ptr) { uint8_t* dst_ptr) {
int i; int i;
void (*ScaleCols)(uint8_t * dst_ptr, const uint8_t* src_ptr, int dst_width, int x, void (*ScaleCols)(uint8_t * dst_ptr, const uint8_t* src_ptr, int dst_width,
int dx) = ScaleCols_C; int x, int dx) = ScaleCols_C;
// Initial source x/y coordinate and step values as 16.16 fixed point. // Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0; int x = 0;
int y = 0; int y = 0;
@ -1759,8 +1759,9 @@ int ScaleOffset(const uint8_t* src,
uint8_t* dst_y = dst + dst_yoffset_even * dst_width; uint8_t* dst_y = dst + dst_yoffset_even * dst_width;
uint8_t* dst_u = uint8_t* dst_u =
dst + dst_width * dst_height + (dst_yoffset_even >> 1) * dst_halfwidth; dst + dst_width * dst_height + (dst_yoffset_even >> 1) * dst_halfwidth;
uint8_t* dst_v = dst + dst_width * dst_height + dst_halfwidth * dst_halfheight + uint8_t* dst_v = dst + dst_width * dst_height +
(dst_yoffset_even >> 1) * dst_halfwidth; dst_halfwidth * dst_halfheight +
(dst_yoffset_even >> 1) * dst_halfwidth;
if (!src || src_width <= 0 || src_height <= 0 || !dst || dst_width <= 0 || if (!src || src_width <= 0 || src_height <= 0 || !dst || dst_width <= 0 ||
dst_height <= 0 || dst_yoffset_even < 0 || dst_height <= 0 || dst_yoffset_even < 0 ||
dst_yoffset_even >= dst_height) { dst_yoffset_even >= dst_height) {

View File

@ -19,15 +19,15 @@ extern "C" {
#endif #endif
// Definition for ScaleFilterCols, ScaleARGBCols and ScaleARGBFilterCols // Definition for ScaleFilterCols, ScaleARGBCols and ScaleARGBFilterCols
#define CANY(NAMEANY, TERP_SIMD, TERP_C, BPP, MASK) \ #define CANY(NAMEANY, TERP_SIMD, TERP_C, BPP, MASK) \
void NAMEANY(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, \ void NAMEANY(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, \
int dx) { \ int dx) { \
int r = dst_width & MASK; \ int r = dst_width & MASK; \
int n = dst_width & ~MASK; \ int n = dst_width & ~MASK; \
if (n > 0) { \ if (n > 0) { \
TERP_SIMD(dst_ptr, src_ptr, n, x, dx); \ TERP_SIMD(dst_ptr, src_ptr, n, x, dx); \
} \ } \
TERP_C(dst_ptr + n * BPP, src_ptr, r, x + n * dx, dx); \ TERP_C(dst_ptr + n * BPP, src_ptr, r, x + n * dx, dx); \
} }
#ifdef HAS_SCALEFILTERCOLS_NEON #ifdef HAS_SCALEFILTERCOLS_NEON
@ -60,31 +60,31 @@ CANY(ScaleARGBFilterCols_Any_MSA,
// Fixed scale down. // Fixed scale down.
// Mask may be non-power of 2, so use MOD // Mask may be non-power of 2, so use MOD
#define SDANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \ #define SDANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \
void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, \ void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, \
int dst_width) { \ int dst_width) { \
int r = (int)((unsigned int)dst_width % (MASK + 1)); /* NOLINT */ \ int r = (int)((unsigned int)dst_width % (MASK + 1)); /* NOLINT */ \
int n = dst_width - r; \ int n = dst_width - r; \
if (n > 0) { \ if (n > 0) { \
SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \ SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \
} \ } \
SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \ SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \
dst_ptr + n * BPP, r); \ dst_ptr + n * BPP, r); \
} }
// Fixed scale down for odd source width. Used by I420Blend subsampling. // Fixed scale down for odd source width. Used by I420Blend subsampling.
// Since dst_width is (width + 1) / 2, this function scales one less pixel // Since dst_width is (width + 1) / 2, this function scales one less pixel
// and copies the last pixel. // and copies the last pixel.
#define SDODD(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \ #define SDODD(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \
void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, \ void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, \
int dst_width) { \ int dst_width) { \
int r = (int)((unsigned int)(dst_width - 1) % (MASK + 1)); /* NOLINT */ \ int r = (int)((unsigned int)(dst_width - 1) % (MASK + 1)); /* NOLINT */ \
int n = (dst_width - 1) - r; \ int n = (dst_width - 1) - r; \
if (n > 0) { \ if (n > 0) { \
SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \ SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \
} \ } \
SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \ SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \
dst_ptr + n * BPP, r + 1); \ dst_ptr + n * BPP, r + 1); \
} }
#ifdef HAS_SCALEROWDOWN2_SSSE3 #ifdef HAS_SCALEROWDOWN2_SSSE3
@ -385,16 +385,16 @@ SDANY(ScaleARGBRowDown2Box_Any_MSA,
#undef SDANY #undef SDANY
// Scale down by even scale factor. // Scale down by even scale factor.
#define SDAANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, BPP, MASK) \ #define SDAANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, BPP, MASK) \
void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, int src_stepx, \ void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, int src_stepx, \
uint8_t* dst_ptr, int dst_width) { \ uint8_t* dst_ptr, int dst_width) { \
int r = dst_width & MASK; \ int r = dst_width & MASK; \
int n = dst_width & ~MASK; \ int n = dst_width & ~MASK; \
if (n > 0) { \ if (n > 0) { \
SCALEROWDOWN_SIMD(src_ptr, src_stride, src_stepx, dst_ptr, n); \ SCALEROWDOWN_SIMD(src_ptr, src_stride, src_stepx, dst_ptr, n); \
} \ } \
SCALEROWDOWN_C(src_ptr + (n * src_stepx) * BPP, src_stride, src_stepx, \ SCALEROWDOWN_C(src_ptr + (n * src_stepx) * BPP, src_stride, src_stepx, \
dst_ptr + n * BPP, r); \ dst_ptr + n * BPP, r); \
} }
#ifdef HAS_SCALEARGBROWDOWNEVEN_SSE2 #ifdef HAS_SCALEARGBROWDOWNEVEN_SSE2
@ -435,13 +435,13 @@ SDAANY(ScaleARGBRowDownEvenBox_Any_MSA,
#endif #endif
// Add rows box filter scale down. // Add rows box filter scale down.
#define SAANY(NAMEANY, SCALEADDROW_SIMD, SCALEADDROW_C, MASK) \ #define SAANY(NAMEANY, SCALEADDROW_SIMD, SCALEADDROW_C, MASK) \
void NAMEANY(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { \ void NAMEANY(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { \
int n = src_width & ~MASK; \ int n = src_width & ~MASK; \
if (n > 0) { \ if (n > 0) { \
SCALEADDROW_SIMD(src_ptr, dst_ptr, n); \ SCALEADDROW_SIMD(src_ptr, dst_ptr, n); \
} \ } \
SCALEADDROW_C(src_ptr + n, dst_ptr + n, src_width & MASK); \ SCALEADDROW_C(src_ptr + n, dst_ptr + n, src_width & MASK); \
} }
#ifdef HAS_SCALEADDROW_SSE2 #ifdef HAS_SCALEADDROW_SSE2

View File

@ -772,8 +772,8 @@ static void ScaleARGBSimple(int src_width,
int y, int y,
int dy) { int dy) {
int j; int j;
void (*ScaleARGBCols)(uint8_t * dst_argb, const uint8_t* src_argb, int dst_width, void (*ScaleARGBCols)(uint8_t * dst_argb, const uint8_t* src_argb,
int x, int dx) = int dst_width, int x, int dx) =
(src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C; (src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C;
(void)src_height; (void)src_height;
#if defined(HAS_SCALEARGBCOLS_SSE2) #if defined(HAS_SCALEARGBCOLS_SSE2)

View File

@ -758,7 +758,9 @@ void ScaleAddRow_C(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) {
} }
} }
void ScaleAddRow_16_C(const uint16_t* src_ptr, uint32_t* dst_ptr, int src_width) { void ScaleAddRow_16_C(const uint16_t* src_ptr,
uint32_t* dst_ptr,
int src_width) {
int x; int x;
assert(src_width > 0); assert(src_width > 0);
for (x = 0; x < src_width - 1; x += 2) { for (x = 0; x < src_width - 1; x += 2) {

File diff suppressed because it is too large Load Diff

View File

@ -807,9 +807,9 @@ void ScaleARGBRowDownEven_NEON(const uint8_t* src_argb,
"subs %w2, %w2, #4 \n" // 4 pixels per loop. "subs %w2, %w2, #4 \n" // 4 pixels per loop.
"st1 {v0.16b}, [%1], #16 \n" "st1 {v0.16b}, [%1], #16 \n"
"b.gt 1b \n" "b.gt 1b \n"
: "+r"(src_argb), // %0 : "+r"(src_argb), // %0
"+r"(dst_argb), // %1 "+r"(dst_argb), // %1
"+r"(dst_width) // %2 "+r"(dst_width) // %2
: "r"((int64_t)(src_stepx * 4)) // %3 : "r"((int64_t)(src_stepx * 4)) // %3
: "memory", "cc", "v0"); : "memory", "cc", "v0");
} }
@ -851,10 +851,10 @@ void ScaleARGBRowDownEvenBox_NEON(const uint8_t* src_argb,
"subs %w3, %w3, #4 \n" // 4 pixels per loop. "subs %w3, %w3, #4 \n" // 4 pixels per loop.
"st1 {v0.16b}, [%2], #16 \n" "st1 {v0.16b}, [%2], #16 \n"
"b.gt 1b \n" "b.gt 1b \n"
: "+r"(src_argb), // %0 : "+r"(src_argb), // %0
"+r"(src_stride), // %1 "+r"(src_stride), // %1
"+r"(dst_argb), // %2 "+r"(dst_argb), // %2
"+r"(dst_width) // %3 "+r"(dst_width) // %3
: "r"((int64_t)(src_stepx * 4)) // %4 : "r"((int64_t)(src_stepx * 4)) // %4
: "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"); : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16");
} }

View File

@ -15,14 +15,13 @@ namespace libyuv {
extern "C" { extern "C" {
#endif #endif
#define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof(x[0]))
struct FourCCAliasEntry { struct FourCCAliasEntry {
uint32_t alias; uint32_t alias;
uint32_t canonical; uint32_t canonical;
}; };
static const struct FourCCAliasEntry kFourCCAliases[] = { #define NUM_ALIASES 18
static const struct FourCCAliasEntry kFourCCAliases[NUM_ALIASES] = {
{FOURCC_IYUV, FOURCC_I420}, {FOURCC_IYUV, FOURCC_I420},
{FOURCC_YU12, FOURCC_I420}, {FOURCC_YU12, FOURCC_I420},
{FOURCC_YU16, FOURCC_I422}, {FOURCC_YU16, FOURCC_I422},
@ -48,7 +47,7 @@ static const struct FourCCAliasEntry kFourCCAliases[] = {
LIBYUV_API LIBYUV_API
uint32_t CanonicalFourCC(uint32_t fourcc) { uint32_t CanonicalFourCC(uint32_t fourcc) {
int i; int i;
for (i = 0; i < ARRAY_SIZE(kFourCCAliases); ++i) { for (i = 0; i < NUM_ALIASES; ++i) {
if (kFourCCAliases[i].alias == fourcc) { if (kFourCCAliases[i].alias == fourcc) {
return kFourCCAliases[i].canonical; return kFourCCAliases[i].canonical;
} }

View File

@ -63,10 +63,10 @@ namespace libyuv {
\ \
/* The test is overall for color conversion matrix being reversible, so */ \ /* The test is overall for color conversion matrix being reversible, so */ \
/* this initializes the pixel with 2x2 blocks to eliminate subsampling. */ \ /* this initializes the pixel with 2x2 blocks to eliminate subsampling. */ \
uint8_t* p = orig_y; \ uint8_t* p = orig_y; \
for (int y = 0; y < benchmark_height_ - HS1; y += HS) { \ for (int y = 0; y < benchmark_height_ - HS1; y += HS) { \
for (int x = 0; x < benchmark_width_ - 1; x += 2) { \ for (int x = 0; x < benchmark_width_ - 1; x += 2) { \
uint8_t r = static_cast<uint8_t>(fastrand()); \ uint8_t r = static_cast<uint8_t>(fastrand()); \
p[0] = r; \ p[0] = r; \
p[1] = r; \ p[1] = r; \
p[HN] = r; \ p[HN] = r; \
@ -74,7 +74,7 @@ namespace libyuv {
p += 2; \ p += 2; \
} \ } \
if (benchmark_width_ & 1) { \ if (benchmark_width_ & 1) { \
uint8_t r = static_cast<uint8_t>(fastrand()); \ uint8_t r = static_cast<uint8_t>(fastrand()); \
p[0] = r; \ p[0] = r; \
p[HN] = r; \ p[HN] = r; \
p += 1; \ p += 1; \
@ -83,13 +83,13 @@ namespace libyuv {
} \ } \
if ((benchmark_height_ & 1) && HS == 2) { \ if ((benchmark_height_ & 1) && HS == 2) { \
for (int x = 0; x < benchmark_width_ - 1; x += 2) { \ for (int x = 0; x < benchmark_width_ - 1; x += 2) { \
uint8_t r = static_cast<uint8_t>(fastrand()); \ uint8_t r = static_cast<uint8_t>(fastrand()); \
p[0] = r; \ p[0] = r; \
p[1] = r; \ p[1] = r; \
p += 2; \ p += 2; \
} \ } \
if (benchmark_width_ & 1) { \ if (benchmark_width_ & 1) { \
uint8_t r = static_cast<uint8_t>(fastrand()); \ uint8_t r = static_cast<uint8_t>(fastrand()); \
p[0] = r; \ p[0] = r; \
p += 1; \ p += 1; \
} \ } \

View File

@ -22,7 +22,9 @@
namespace libyuv { namespace libyuv {
// hash seed of 5381 recommended. // hash seed of 5381 recommended.
static uint32_t ReferenceHashDjb2(const uint8_t* src, uint64_t count, uint32_t seed) { static uint32_t ReferenceHashDjb2(const uint8_t* src,
uint64_t count,
uint32_t seed) {
uint32_t hash = seed; uint32_t hash = seed;
if (count > 0) { if (count > 0) {
do { do {

View File

@ -173,8 +173,8 @@ TESTPLANARTOP(H420, uint8_t, 1, 2, 2, H010, uint16_t, 2, 2, 2)
SUBSAMPLE(kHeight, SUBSAMP_Y)); \ SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \ align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \ SUBSAMPLE(kHeight, SUBSAMP_Y)); \
uint8_t* src_u = src_uv + OFF_U; \ uint8_t* src_u = src_uv + OFF_U; \
uint8_t* src_v = src_uv + (PIXEL_STRIDE == 1 ? kSizeUV : OFF_V); \ uint8_t* src_v = src_uv + (PIXEL_STRIDE == 1 ? kSizeUV : OFF_V); \
int src_stride_uv = SUBSAMPLE(kWidth, SUBSAMP_X) * PIXEL_STRIDE; \ int src_stride_uv = SUBSAMPLE(kWidth, SUBSAMP_X) * PIXEL_STRIDE; \
for (int i = 0; i < kHeight; ++i) \ for (int i = 0; i < kHeight; ++i) \
for (int j = 0; j < kWidth; ++j) \ for (int j = 0; j < kWidth; ++j) \
@ -2016,56 +2016,57 @@ TEST_F(LibYUVConvertTest, ARGBToAR30Row_Opt) {
#endif // HAS_ARGBTOAR30ROW_AVX2 #endif // HAS_ARGBTOAR30ROW_AVX2
// TODO(fbarchard): Fix clamping issue affected by U channel. // TODO(fbarchard): Fix clamping issue affected by U channel.
#define TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \ #define TESTPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
ALIGN, YALIGN, W1280, DIFF, N, NEG, SOFF, DOFF) \ ALIGN, YALIGN, W1280, DIFF, N, NEG, SOFF, DOFF) \
TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
const int kWidth = ((W1280) > 0) ? (W1280) : 1; \ const int kWidth = ((W1280) > 0) ? (W1280) : 1; \
const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \ const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \ const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
const int kBpc = 2; \ const int kBpc = 2; \
align_buffer_page_end(src_y, kWidth* kHeight* kBpc + SOFF); \ align_buffer_page_end(src_y, kWidth* kHeight* kBpc + SOFF); \
align_buffer_page_end(src_u, kSizeUV* kBpc + SOFF); \ align_buffer_page_end(src_u, kSizeUV* kBpc + SOFF); \
align_buffer_page_end(src_v, kSizeUV* kBpc + SOFF); \ align_buffer_page_end(src_v, kSizeUV* kBpc + SOFF); \
align_buffer_page_end(dst_argb_c, kStrideB* kHeight + DOFF); \ align_buffer_page_end(dst_argb_c, kStrideB* kHeight + DOFF); \
align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + DOFF); \ align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + DOFF); \
for (int i = 0; i < kWidth * kHeight; ++i) { \ for (int i = 0; i < kWidth * kHeight; ++i) { \
reinterpret_cast<uint16_t*>(src_y + SOFF)[i] = (fastrand() & 0x3ff); \ reinterpret_cast<uint16_t*>(src_y + SOFF)[i] = (fastrand() & 0x3ff); \
} \ } \
for (int i = 0; i < kSizeUV; ++i) { \ for (int i = 0; i < kSizeUV; ++i) { \
reinterpret_cast<uint16_t*>(src_u + SOFF)[i] = (fastrand() & 0x3ff); \ reinterpret_cast<uint16_t*>(src_u + SOFF)[i] = (fastrand() & 0x3ff); \
reinterpret_cast<uint16_t*>(src_v + SOFF)[i] = (fastrand() & 0x3ff); \ reinterpret_cast<uint16_t*>(src_v + SOFF)[i] = (fastrand() & 0x3ff); \
} \ } \
memset(dst_argb_c + DOFF, 1, kStrideB * kHeight); \ memset(dst_argb_c + DOFF, 1, kStrideB * kHeight); \
memset(dst_argb_opt + DOFF, 101, kStrideB * kHeight); \ memset(dst_argb_opt + DOFF, 101, kStrideB * kHeight); \
MaskCpuFlags(disable_cpu_flags_); \ MaskCpuFlags(disable_cpu_flags_); \
FMT_PLANAR##To##FMT_B(reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth, \ FMT_PLANAR##To##FMT_B( \
reinterpret_cast<uint16_t*>(src_u + SOFF), kStrideUV, \ reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth, \
reinterpret_cast<uint16_t*>(src_v + SOFF), kStrideUV, \ reinterpret_cast<uint16_t*>(src_u + SOFF), kStrideUV, \
dst_argb_c + DOFF, kStrideB, kWidth, NEG kHeight); \ reinterpret_cast<uint16_t*>(src_v + SOFF), kStrideUV, \
MaskCpuFlags(benchmark_cpu_info_); \ dst_argb_c + DOFF, kStrideB, kWidth, NEG kHeight); \
for (int i = 0; i < benchmark_iterations_; ++i) { \ MaskCpuFlags(benchmark_cpu_info_); \
FMT_PLANAR##To##FMT_B( \ for (int i = 0; i < benchmark_iterations_; ++i) { \
reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth, \ FMT_PLANAR##To##FMT_B( \
reinterpret_cast<uint16_t*>(src_u + SOFF), kStrideUV, \ reinterpret_cast<uint16_t*>(src_y + SOFF), kWidth, \
reinterpret_cast<uint16_t*>(src_v + SOFF), kStrideUV, \ reinterpret_cast<uint16_t*>(src_u + SOFF), kStrideUV, \
dst_argb_opt + DOFF, kStrideB, kWidth, NEG kHeight); \ reinterpret_cast<uint16_t*>(src_v + SOFF), kStrideUV, \
} \ dst_argb_opt + DOFF, kStrideB, kWidth, NEG kHeight); \
int max_diff = 0; \ } \
for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \ int max_diff = 0; \
int abs_diff = abs(static_cast<int>(dst_argb_c[i + DOFF]) - \ for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \
static_cast<int>(dst_argb_opt[i + DOFF])); \ int abs_diff = abs(static_cast<int>(dst_argb_c[i + DOFF]) - \
if (abs_diff > max_diff) { \ static_cast<int>(dst_argb_opt[i + DOFF])); \
max_diff = abs_diff; \ if (abs_diff > max_diff) { \
} \ max_diff = abs_diff; \
} \ } \
EXPECT_LE(max_diff, DIFF); \ } \
free_aligned_buffer_page_end(src_y); \ EXPECT_LE(max_diff, DIFF); \
free_aligned_buffer_page_end(src_u); \ free_aligned_buffer_page_end(src_y); \
free_aligned_buffer_page_end(src_v); \ free_aligned_buffer_page_end(src_u); \
free_aligned_buffer_page_end(dst_argb_c); \ free_aligned_buffer_page_end(src_v); \
free_aligned_buffer_page_end(dst_argb_opt); \ free_aligned_buffer_page_end(dst_argb_c); \
free_aligned_buffer_page_end(dst_argb_opt); \
} }
#define TESTPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \ #define TESTPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \

View File

@ -2061,8 +2061,8 @@ int TestHalfFloatPlane(int benchmark_width,
MaskCpuFlags(disable_cpu_flags); MaskCpuFlags(disable_cpu_flags);
for (j = 0; j < benchmark_iterations; j++) { for (j = 0; j < benchmark_iterations; j++) {
HalfFloatPlane(reinterpret_cast<uint16_t*>(orig_y), benchmark_width * 2, HalfFloatPlane(reinterpret_cast<uint16_t*>(orig_y), benchmark_width * 2,
reinterpret_cast<uint16_t*>(dst_c), benchmark_width * 2, scale, reinterpret_cast<uint16_t*>(dst_c), benchmark_width * 2,
benchmark_width, benchmark_height); scale, benchmark_width, benchmark_height);
} }
// Enable optimizations. // Enable optimizations.
@ -2075,8 +2075,9 @@ int TestHalfFloatPlane(int benchmark_width,
int max_diff = 0; int max_diff = 0;
for (i = 0; i < y_plane_size / 2; ++i) { for (i = 0; i < y_plane_size / 2; ++i) {
int abs_diff = abs(static_cast<int>(reinterpret_cast<uint16_t*>(dst_c)[i]) - int abs_diff =
static_cast<int>(reinterpret_cast<uint16_t*>(dst_opt)[i])); abs(static_cast<int>(reinterpret_cast<uint16_t*>(dst_c)[i]) -
static_cast<int>(reinterpret_cast<uint16_t*>(dst_opt)[i]));
if (abs_diff > max_diff) { if (abs_diff > max_diff) {
max_diff = abs_diff; max_diff = abs_diff;
} }
@ -2788,8 +2789,9 @@ TEST_F(LibYUVPlanarTest, Convert8To16Plane) {
MaskCpuFlags(disable_cpu_flags_); MaskCpuFlags(disable_cpu_flags_);
Convert8To16Plane(src_pixels_y, benchmark_width_, Convert8To16Plane(src_pixels_y, benchmark_width_,
reinterpret_cast<uint16_t*>(dst_pixels_y_c), benchmark_width_, reinterpret_cast<uint16_t*>(dst_pixels_y_c),
1024, benchmark_width_, benchmark_height_); benchmark_width_, 1024, benchmark_width_,
benchmark_height_);
MaskCpuFlags(benchmark_cpu_info_); MaskCpuFlags(benchmark_cpu_info_);
for (int i = 0; i < benchmark_iterations_; ++i) { for (int i = 0; i < benchmark_iterations_; ++i) {
@ -3214,8 +3216,9 @@ TEST_F(LibYUVPlanarTest, TestGaussCol_Opt) {
EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
} }
EXPECT_EQ(dst_pixels_c[0], static_cast<uint32_t>(0 * 1 + 640 * 4 + 640 * 2 * 6 + EXPECT_EQ(dst_pixels_c[0],
640 * 3 * 4 + 640 * 4 * 1)); static_cast<uint32_t>(0 * 1 + 640 * 4 + 640 * 2 * 6 + 640 * 3 * 4 +
640 * 4 * 1));
EXPECT_EQ(dst_pixels_c[639], static_cast<uint32_t>(30704)); EXPECT_EQ(dst_pixels_c[639], static_cast<uint32_t>(30704));
} }

View File

@ -48,7 +48,8 @@ static int ARGBTestFilter(int src_width,
} }
MemRandomize(src_argb, src_argb_plane_size); MemRandomize(src_argb, src_argb_plane_size);
int64_t dst_argb_plane_size = (dst_width + b * 2) * (dst_height + b * 2) * 4LL; int64_t dst_argb_plane_size =
(dst_width + b * 2) * (dst_height + b * 2) * 4LL;
int dst_stride_argb = (b * 2 + dst_width) * 4; int dst_stride_argb = (b * 2 + dst_width) * 4;
align_buffer_page_end(dst_argb_c, dst_argb_plane_size); align_buffer_page_end(dst_argb_c, dst_argb_plane_size);
@ -310,6 +311,7 @@ TEST_SCALETO(ARGBScale, 1280, 720)
#undef TEST_SCALETO #undef TEST_SCALETO
// Scale with YUV conversion to ARGB and clipping. // Scale with YUV conversion to ARGB and clipping.
// TODO(fbarchard): Add fourcc support. All 4 ARGB formats is easy to support.
LIBYUV_API LIBYUV_API
int YUVToARGBScaleReference2(const uint8_t* src_y, int YUVToARGBScaleReference2(const uint8_t* src_y,
int src_stride_y, int src_stride_y,
@ -317,12 +319,12 @@ int YUVToARGBScaleReference2(const uint8_t* src_y,
int src_stride_u, int src_stride_u,
const uint8_t* src_v, const uint8_t* src_v,
int src_stride_v, int src_stride_v,
uint32_t /* src_fourcc */, // TODO: Add support. uint32 /* src_fourcc */,
int src_width, int src_width,
int src_height, int src_height,
uint8_t* dst_argb, uint8_t* dst_argb,
int dst_stride_argb, int dst_stride_argb,
uint32_t /* dst_fourcc */, // TODO: Add support. uint32 /* dst_fourcc */,
int dst_width, int dst_width,
int dst_height, int dst_height,
int clip_x, int clip_x,
@ -330,7 +332,8 @@ int YUVToARGBScaleReference2(const uint8_t* src_y,
int clip_width, int clip_width,
int clip_height, int clip_height,
enum FilterMode filtering) { enum FilterMode filtering) {
uint8_t* argb_buffer = static_cast<uint8_t*>(malloc(src_width * src_height * 4)); uint8_t* argb_buffer =
static_cast<uint8_t*>(malloc(src_width * src_height * 4));
int r; int r;
I420ToARGB(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, I420ToARGB(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
argb_buffer, src_width * 4, src_width, src_height); argb_buffer, src_width * 4, src_width, src_height);
@ -342,7 +345,12 @@ int YUVToARGBScaleReference2(const uint8_t* src_y,
return r; return r;
} }
static void FillRamp(uint8_t* buf, int width, int height, int v, int dx, int dy) { static void FillRamp(uint8_t* buf,
int width,
int height,
int v,
int dx,
int dy) {
int rv = v; int rv = v;
for (int y = 0; y < height; ++y) { for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; ++x) { for (int x = 0; x < width; ++x) {

View File

@ -69,10 +69,10 @@ static inline bool SizeValid(int src_width,
return true; return true;
} }
#define align_buffer_page_end(var, size) \ #define align_buffer_page_end(var, size) \
uint8_t* var##_mem = \ uint8_t* var##_mem = \
reinterpret_cast<uint8_t*>(malloc(((size) + 4095 + 63) & ~4095)); \ reinterpret_cast<uint8_t*>(malloc(((size) + 4095 + 63) & ~4095)); \
uint8_t* var = reinterpret_cast<uint8_t*>( \ uint8_t* var = reinterpret_cast<uint8_t*>( \
(intptr_t)(var##_mem + (((size) + 4095 + 63) & ~4095) - (size)) & ~63) (intptr_t)(var##_mem + (((size) + 4095 + 63) & ~4095) - (size)) & ~63)
#define free_aligned_buffer_page_end(var) \ #define free_aligned_buffer_page_end(var) \

View File

@ -39,8 +39,8 @@ typedef unsigned long long uint64_t; // NOLINT
!defined(__aarch64__) !defined(__aarch64__)
#define HAS_SUMSQUAREERROR_NEON #define HAS_SUMSQUAREERROR_NEON
static uint32_t SumSquareError_NEON(const uint8_t* src_a, static uint32_t SumSquareError_NEON(const uint8_t* src_a,
const uint8_t* src_b, const uint8_t* src_b,
int count) { int count) {
volatile uint32_t sse; volatile uint32_t sse;
asm volatile( asm volatile(
"vmov.u8 q7, #0 \n" "vmov.u8 q7, #0 \n"
@ -74,8 +74,8 @@ static uint32_t SumSquareError_NEON(const uint8_t* src_a,
#elif !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) #elif !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
#define HAS_SUMSQUAREERROR_NEON #define HAS_SUMSQUAREERROR_NEON
static uint32_t SumSquareError_NEON(const uint8_t* src_a, static uint32_t SumSquareError_NEON(const uint8_t* src_a,
const uint8_t* src_b, const uint8_t* src_b,
int count) { int count) {
volatile uint32_t sse; volatile uint32_t sse;
asm volatile( asm volatile(
"eor v16.16b, v16.16b, v16.16b \n" "eor v16.16b, v16.16b, v16.16b \n"
@ -108,8 +108,8 @@ static uint32_t SumSquareError_NEON(const uint8_t* src_a,
#elif !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) #elif !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
#define HAS_SUMSQUAREERROR_SSE2 #define HAS_SUMSQUAREERROR_SSE2
__declspec(naked) static uint32_t SumSquareError_SSE2(const uint8_t* /*src_a*/, __declspec(naked) static uint32_t SumSquareError_SSE2(const uint8_t* /*src_a*/,
const uint8_t* /*src_b*/, const uint8_t* /*src_b*/,
int /*count*/) { int /*count*/) {
__asm { __asm {
mov eax, [esp + 4] // src_a mov eax, [esp + 4] // src_a
mov edx, [esp + 8] // src_b mov edx, [esp + 8] // src_b
@ -147,8 +147,8 @@ __declspec(naked) static uint32_t SumSquareError_SSE2(const uint8_t* /*src_a*/,
#elif !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__)) #elif !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
#define HAS_SUMSQUAREERROR_SSE2 #define HAS_SUMSQUAREERROR_SSE2
static uint32_t SumSquareError_SSE2(const uint8_t* src_a, static uint32_t SumSquareError_SSE2(const uint8_t* src_a,
const uint8_t* src_b, const uint8_t* src_b,
int count) { int count) {
uint32_t sse; uint32_t sse;
asm volatile( // NOLINT asm volatile( // NOLINT
"pxor %%xmm0,%%xmm0 \n" "pxor %%xmm0,%%xmm0 \n"
@ -229,8 +229,8 @@ static int CpuHasSSE2() {
#endif // HAS_SUMSQUAREERROR_SSE2 #endif // HAS_SUMSQUAREERROR_SSE2
static uint32_t SumSquareError_C(const uint8_t* src_a, static uint32_t SumSquareError_C(const uint8_t* src_a,
const uint8_t* src_b, const uint8_t* src_b,
int count) { int count) {
uint32_t sse = 0u; uint32_t sse = 0u;
for (int x = 0; x < count; ++x) { for (int x = 0; x < count; ++x) {
int diff = src_a[x] - src_b[x]; int diff = src_a[x] - src_b[x];
@ -242,8 +242,8 @@ static uint32_t SumSquareError_C(const uint8_t* src_a,
double ComputeSumSquareError(const uint8_t* src_a, double ComputeSumSquareError(const uint8_t* src_a,
const uint8_t* src_b, const uint8_t* src_b,
int count) { int count) {
uint32_t (*SumSquareError)(const uint8_t* src_a, const uint8_t* src_b, int count) = uint32_t (*SumSquareError)(const uint8_t* src_a, const uint8_t* src_b,
SumSquareError_C; int count) = SumSquareError_C;
#if defined(HAS_SUMSQUAREERROR_NEON) #if defined(HAS_SUMSQUAREERROR_NEON)
SumSquareError = SumSquareError_NEON; SumSquareError = SumSquareError_NEON;
#endif #endif

View File

@ -262,7 +262,7 @@ double GetSSIMFullKernel(const uint8_t* org,
#define ADD_AND_STORE_FOUR_EPI32(M, OUT) \ #define ADD_AND_STORE_FOUR_EPI32(M, OUT) \
do { \ do { \
uint32_t tmp[4]; \ uint32_t tmp[4]; \
_mm_storeu_si128(reinterpret_cast<__m128i*>(tmp), (M)); \ _mm_storeu_si128(reinterpret_cast<__m128i*>(tmp), (M)); \
(OUT) = tmp[3] + tmp[2] + tmp[1] + tmp[0]; \ (OUT) = tmp[3] + tmp[2] + tmp[1] + tmp[0]; \
} while (0) } while (0)