diff --git a/README.chromium b/README.chromium index f49a7a11b..a3c27bf88 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1646 +Version: 1647 License: BSD License File: LICENSE diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 285ec33b5..58e53ac60 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1646 +#define LIBYUV_VERSION 1647 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/source/planar_functions.cc b/source/planar_functions.cc index b8a53e856..3e0600b25 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -1541,7 +1541,7 @@ void SetPlane(uint8* dst_y, int height, uint32 value) { int y; - void (*SetRow)(uint8 * dst, uint8 value, int width) = SetRow_C; + void (*SetRow)(uint8* dst, uint8 value, int width) = SetRow_C; if (height < 0) { height = -height; dst_y = dst_y + (height - 1) * dst_stride_y; @@ -1624,7 +1624,7 @@ int ARGBRect(uint8* dst_argb, int height, uint32 value) { int y; - void (*ARGBSetRow)(uint8 * dst_argb, uint32 value, int width) = ARGBSetRow_C; + void (*ARGBSetRow)(uint8* dst_argb, uint32 value, int width) = ARGBSetRow_C; if (!dst_argb || width <= 0 || height == 0 || dst_x < 0 || dst_y < 0) { return -1; } @@ -1902,7 +1902,7 @@ int ARGBSepia(uint8* dst_argb, int width, int height) { int y; - void (*ARGBSepiaRow)(uint8 * dst_argb, int width) = ARGBSepiaRow_C; + void (*ARGBSepiaRow)(uint8* dst_argb, int width) = ARGBSepiaRow_C; uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) { return -1; @@ -2030,7 +2030,7 @@ int ARGBColorTable(uint8* dst_argb, int width, int height) { int y; - void (*ARGBColorTableRow)(uint8 * dst_argb, const uint8* table_argb, + void (*ARGBColorTableRow)(uint8* dst_argb, const uint8* table_argb, int width) = ARGBColorTableRow_C; uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 || @@ -2066,7 +2066,7 @@ int RGBColorTable(uint8* dst_argb, int width, int height) { int y; - void (*RGBColorTableRow)(uint8 * dst_argb, const uint8* table_argb, + void (*RGBColorTableRow)(uint8* dst_argb, const uint8* table_argb, int width) = RGBColorTableRow_C; uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 || @@ -2111,7 +2111,7 @@ int ARGBQuantize(uint8* dst_argb, int width, int height) { int y; - void (*ARGBQuantizeRow)(uint8 * dst_argb, int scale, int interval_size, + void (*ARGBQuantizeRow)(uint8* dst_argb, int scale, int interval_size, int interval_offset, int width) = ARGBQuantizeRow_C; uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 || @@ -2348,7 +2348,7 @@ int InterpolatePlane(const uint8* src0, int height, int interpolation) { int y; - void (*InterpolateRow)(uint8 * dst_ptr, const uint8* src_ptr, + void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; if (!src0 || !src1 || !dst || width <= 0 || height == 0) { @@ -3124,7 +3124,7 @@ int YUY2ToNV12(const uint8* src_yuy2, int halfwidth = (width + 1) >> 1; void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) = SplitUVRow_C; - void (*InterpolateRow)(uint8 * dst_ptr, const uint8* src_ptr, + void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; if (!src_yuy2 || !dst_y || !dst_uv || width <= 0 || height == 0) { @@ -3232,7 +3232,7 @@ int UYVYToNV12(const uint8* src_uyvy, int halfwidth = (width + 1) >> 1; void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) = SplitUVRow_C; - void (*InterpolateRow)(uint8 * dst_ptr, const uint8* src_ptr, + void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; if (!src_uyvy || !dst_y || !dst_uv || width <= 0 || height == 0) { diff --git a/source/row_any.cc b/source/row_any.cc index 74a6621fe..af5d1fbce 100644 --- a/source/row_any.cc +++ b/source/row_any.cc @@ -19,6 +19,14 @@ namespace libyuv { extern "C" { #endif +// memset for temp is meant to clear the source buffer (not dest) so that +// SIMD that reads full multiple of 16 bytes will not trigger msan errors. +// memset is not needed for production, as the garbage values are processed but +// not used, although there may be edge cases for subsampling. +// The size of the buffer is based on the largest read, which can be inferred +// by the source type (e.g. ARGB) and the mask (last parameter), or by examining +// the source code for how much the source pointers are advanced. + // Subsampled source needs to be increase by 1 of not even. #define SS(width, shift) (((width) + (1 << (shift)) - 1) >> (shift)) @@ -618,17 +626,17 @@ ANY11(ARGBExtractAlphaRow_Any_NEON, ARGBExtractAlphaRow_NEON, 0, 4, 1, 15) // Any 1 to 1 blended. Destination is read, modify, write. #define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \ - SIMD_ALIGNED(uint8 temp[128 * 2]); \ - memset(temp, 0, 128 * 2); /* for YUY2 and msan */ \ + SIMD_ALIGNED(uint8 temp[64 * 2]); \ + memset(temp, 0, 64 * 2); /* for msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(src_ptr, dst_ptr, n); \ } \ memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \ - memcpy(temp + 128, dst_ptr + n * BPP, r * BPP); \ - ANY_SIMD(temp, temp + 128, MASK + 1); \ - memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ + memcpy(temp + 64, dst_ptr + n * BPP, r * BPP); \ + ANY_SIMD(temp, temp + 64, MASK + 1); \ + memcpy(dst_ptr + n * BPP, temp + 64, r * BPP); \ } #ifdef HAS_ARGBCOPYALPHAROW_AVX2 @@ -713,31 +721,31 @@ ANY11P(ARGBShuffleRow_Any_MSA, ARGBShuffleRow_MSA, const uint8*, 4, 4, 7) #define ANY11P16(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK) \ void NAMEANY(const uint16* src_ptr, uint16* dst_ptr, T shuffler, \ int width) { \ - SIMD_ALIGNED(uint16 temp[32 * 2]); \ - memset(temp, 0, 64); /* for msan */ \ + SIMD_ALIGNED(uint16 temp[16 * 2]); \ + memset(temp, 0, 32); /* for msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(src_ptr, dst_ptr, shuffler, n); \ } \ - memcpy(temp, src_ptr + n * SBPP, r * SBPP); \ - ANY_SIMD(temp, temp + 64, shuffler, MASK + 1); \ - memcpy(dst_ptr + n * BPP, temp + 64, r * BPP); \ + memcpy(temp, src_ptr + n, r * SBPP); \ + ANY_SIMD(temp, temp + 16, shuffler, MASK + 1); \ + memcpy(dst_ptr + n, temp + 16, r * BPP); \ } #ifdef HAS_HALFFLOATROW_SSE2 -ANY11P16(HalfFloatRow_Any_SSE2, HalfFloatRow_SSE2, float, 1, 1, 7) +ANY11P16(HalfFloatRow_Any_SSE2, HalfFloatRow_SSE2, float, 2, 2, 7) #endif #ifdef HAS_HALFFLOATROW_AVX2 -ANY11P16(HalfFloatRow_Any_AVX2, HalfFloatRow_AVX2, float, 1, 1, 15) +ANY11P16(HalfFloatRow_Any_AVX2, HalfFloatRow_AVX2, float, 2, 2, 15) #endif #ifdef HAS_HALFFLOATROW_F16C -ANY11P16(HalfFloatRow_Any_F16C, HalfFloatRow_F16C, float, 1, 1, 15) -ANY11P16(HalfFloat1Row_Any_F16C, HalfFloat1Row_F16C, float, 1, 1, 15) +ANY11P16(HalfFloatRow_Any_F16C, HalfFloatRow_F16C, float, 2, 2, 15) +ANY11P16(HalfFloat1Row_Any_F16C, HalfFloat1Row_F16C, float, 2, 2, 15) #endif #ifdef HAS_HALFFLOATROW_NEON -ANY11P16(HalfFloatRow_Any_NEON, HalfFloatRow_NEON, float, 1, 1, 7) -ANY11P16(HalfFloat1Row_Any_NEON, HalfFloat1Row_NEON, float, 1, 1, 7) +ANY11P16(HalfFloatRow_Any_NEON, HalfFloatRow_NEON, float, 2, 2, 7) +ANY11P16(HalfFloat1Row_Any_NEON, HalfFloat1Row_NEON, float, 2, 2, 7) #endif #undef ANY11P16 diff --git a/source/scale.cc b/source/scale.cc index a5c7f7ada..d2bd2532d 100644 --- a/source/scale.cc +++ b/source/scale.cc @@ -1014,10 +1014,10 @@ void ScalePlaneBilinearDown(int src_width, const int max_y = (src_height - 1) << 16; int j; - void (*ScaleFilterCols)(uint8 * dst_ptr, const uint8* src_ptr, int dst_width, + void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr, int dst_width, int x, int dx) = (src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C; - void (*InterpolateRow)(uint8 * dst_ptr, const uint8* src_ptr, + void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, @@ -1121,10 +1121,10 @@ void ScalePlaneBilinearDown_16(int src_width, const int max_y = (src_height - 1) << 16; int j; - void (*ScaleFilterCols)(uint16 * dst_ptr, const uint16* src_ptr, - int dst_width, int x, int dx) = + void (*ScaleFilterCols)(uint16* dst_ptr, const uint16* src_ptr, int dst_width, + int x, int dx) = (src_width >= 32768) ? ScaleFilterCols64_16_C : ScaleFilterCols_16_C; - void (*InterpolateRow)(uint16 * dst_ptr, const uint16* src_ptr, + void (*InterpolateRow)(uint16* dst_ptr, const uint16* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_16_C; ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, @@ -1217,10 +1217,10 @@ void ScalePlaneBilinearUp(int src_width, int dx = 0; int dy = 0; const int max_y = (src_height - 1) << 16; - void (*InterpolateRow)(uint8 * dst_ptr, const uint8* src_ptr, + void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; - void (*ScaleFilterCols)(uint8 * dst_ptr, const uint8* src_ptr, int dst_width, + void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr, int dst_width, int x, int dx) = filtering ? ScaleFilterCols_C : ScaleCols_C; ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, @@ -1352,11 +1352,11 @@ void ScalePlaneBilinearUp_16(int src_width, int dx = 0; int dy = 0; const int max_y = (src_height - 1) << 16; - void (*InterpolateRow)(uint16 * dst_ptr, const uint16* src_ptr, + void (*InterpolateRow)(uint16* dst_ptr, const uint16* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_16_C; - void (*ScaleFilterCols)(uint16 * dst_ptr, const uint16* src_ptr, - int dst_width, int x, int dx) = + void (*ScaleFilterCols)(uint16* dst_ptr, const uint16* src_ptr, int dst_width, + int x, int dx) = filtering ? ScaleFilterCols_16_C : ScaleCols_16_C; ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, &dx, &dy); @@ -1485,7 +1485,7 @@ static void ScalePlaneSimple(int src_width, const uint8* src_ptr, uint8* dst_ptr) { int i; - void (*ScaleCols)(uint8 * dst_ptr, const uint8* src_ptr, int dst_width, int x, + void (*ScaleCols)(uint8* dst_ptr, const uint8* src_ptr, int dst_width, int x, int dx) = ScaleCols_C; // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; @@ -1521,7 +1521,7 @@ static void ScalePlaneSimple_16(int src_width, const uint16* src_ptr, uint16* dst_ptr) { int i; - void (*ScaleCols)(uint16 * dst_ptr, const uint16* src_ptr, int dst_width, + void (*ScaleCols)(uint16* dst_ptr, const uint16* src_ptr, int dst_width, int x, int dx) = ScaleCols_16_C; // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; diff --git a/source/scale_argb.cc b/source/scale_argb.cc index 1ea28f0d1..35178e953 100644 --- a/source/scale_argb.cc +++ b/source/scale_argb.cc @@ -263,10 +263,10 @@ static void ScaleARGBBilinearDown(int src_width, int dy, enum FilterMode filtering) { int j; - void (*InterpolateRow)(uint8 * dst_argb, const uint8* src_argb, + void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; - void (*ScaleARGBFilterCols)(uint8 * dst_argb, const uint8* src_argb, + void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb, int dst_width, int x, int dx) = (src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C; int64 xlast = x + (int64)(dst_width - 1) * dx; @@ -380,10 +380,10 @@ static void ScaleARGBBilinearUp(int src_width, int dy, enum FilterMode filtering) { int j; - void (*InterpolateRow)(uint8 * dst_argb, const uint8* src_argb, + void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; - void (*ScaleARGBFilterCols)(uint8 * dst_argb, const uint8* src_argb, + void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb, int dst_width, int x, int dx) = filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C; const int max_y = (src_height - 1) << 16; @@ -581,7 +581,7 @@ static void ScaleYUVToARGBBilinearUp(int src_width, } #endif - void (*InterpolateRow)(uint8 * dst_argb, const uint8* src_argb, + void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; #if defined(HAS_INTERPOLATEROW_SSSE3) @@ -623,7 +623,7 @@ static void ScaleYUVToARGBBilinearUp(int src_width, } #endif - void (*ScaleARGBFilterCols)(uint8 * dst_argb, const uint8* src_argb, + void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb, int dst_width, int x, int dx) = filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C; if (src_width >= 32768) { @@ -762,7 +762,7 @@ static void ScaleARGBSimple(int src_width, int y, int dy) { int j; - void (*ScaleARGBCols)(uint8 * dst_argb, const uint8* src_argb, int dst_width, + void (*ScaleARGBCols)(uint8* dst_argb, const uint8* src_argb, int dst_width, int x, int dx) = (src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C; (void)src_height; diff --git a/source/scale_common.cc b/source/scale_common.cc index 1bef39df8..a39ee05ad 100644 --- a/source/scale_common.cc +++ b/source/scale_common.cc @@ -1029,7 +1029,7 @@ void ScalePlaneVertical(int src_height, enum FilterMode filtering) { // TODO(fbarchard): Allow higher bpp. int dst_width_bytes = dst_width * bpp; - void (*InterpolateRow)(uint8 * dst_argb, const uint8* src_argb, + void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0; @@ -1109,7 +1109,7 @@ void ScalePlaneVertical_16(int src_height, enum FilterMode filtering) { // TODO(fbarchard): Allow higher wpp. int dst_width_words = dst_width * wpp; - void (*InterpolateRow)(uint16 * dst_argb, const uint16* src_argb, + void (*InterpolateRow)(uint16* dst_argb, const uint16* src_argb, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_16_C; const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0; diff --git a/util/psnr_main.cc b/util/psnr_main.cc index 4d930be4a..01e877746 100644 --- a/util/psnr_main.cc +++ b/util/psnr_main.cc @@ -356,19 +356,17 @@ int main(int argc, const char* argv[]) { const int uv_size = ((image_width + 1) / 2) * ((image_height + 1) / 2); const size_t total_size = y_size + 2 * uv_size; // NOLINT #if defined(_MSC_VER) - _fseeki64( - file_org, - static_cast<__int64>(num_skip_org) * static_cast<__int64>(total_size), - SEEK_SET); + _fseeki64(file_org, static_cast<__int64>(num_skip_org) * + static_cast<__int64>(total_size), + SEEK_SET); #else fseek(file_org, num_skip_org * total_size, SEEK_SET); #endif for (int cur_rec = 0; cur_rec < num_rec; ++cur_rec) { #if defined(_MSC_VER) - _fseeki64( - file_rec[cur_rec], - static_cast<__int64>(num_skip_rec) * static_cast<__int64>(total_size), - SEEK_SET); + _fseeki64(file_rec[cur_rec], static_cast<__int64>(num_skip_rec) * + static_cast<__int64>(total_size), + SEEK_SET); #else fseek(file_rec[cur_rec], num_skip_rec * total_size, SEEK_SET); #endif