From 11cbf8f976a41ccb279dc67489832ea9f12d56d7 Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Thu, 14 Oct 2021 13:06:54 -0700 Subject: [PATCH] Add LIBYUV_BIT_EXACT macro to force C to match SIMD - C code use ARM path, so NEON and C match - C used on Intel platforms, disabling AVX. Bug: libyuv:908, b/202888439 Change-Id: Ie035a150a60d3cf4ee7c849a96819d43640cf020 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3223507 Commit-Queue: Frank Barchard Reviewed-by: richard winterton --- README.chromium | 2 +- include/libyuv/compare_row.h | 2 +- include/libyuv/planar_functions.h | 2 +- include/libyuv/rotate_row.h | 2 +- include/libyuv/row.h | 2 +- include/libyuv/scale_row.h | 2 +- include/libyuv/version.h | 2 +- source/row_common.cc | 10 ++--- source/scale.cc | 38 +++++++++--------- source/scale_argb.cc | 36 ++++++++--------- source/scale_uv.cc | 58 +++++++++++++------------- unit_test/convert_test.cc | 67 ++++++++++++++++++++++--------- 12 files changed, 126 insertions(+), 97 deletions(-) diff --git a/README.chromium b/README.chromium index 0062b186b..6ecadd7b2 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1796 +Version: 1798 License: BSD License File: LICENSE diff --git a/include/libyuv/compare_row.h b/include/libyuv/compare_row.h index 64115b3a3..7df7acc65 100644 --- a/include/libyuv/compare_row.h +++ b/include/libyuv/compare_row.h @@ -18,7 +18,7 @@ namespace libyuv { extern "C" { #endif -#if defined(__pnacl__) || defined(__CLR_VER) || \ +#if defined(LIBYUV_BIT_EXACT) || defined(__pnacl__) || defined(__CLR_VER) || \ (defined(__native_client__) && defined(__x86_64__)) || \ (defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) #define LIBYUV_DISABLE_X86 diff --git a/include/libyuv/planar_functions.h b/include/libyuv/planar_functions.h index def773cb4..1efd651aa 100644 --- a/include/libyuv/planar_functions.h +++ b/include/libyuv/planar_functions.h @@ -23,7 +23,7 @@ extern "C" { #endif // TODO(fbarchard): Move cpu macros to row.h -#if defined(__pnacl__) || defined(__CLR_VER) || \ +#if defined(LIBYUV_BIT_EXACT) || defined(__pnacl__) || defined(__CLR_VER) || \ (defined(__native_client__) && defined(__x86_64__)) || \ (defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) #define LIBYUV_DISABLE_X86 diff --git a/include/libyuv/rotate_row.h b/include/libyuv/rotate_row.h index f4c701fb4..a60f8eac6 100644 --- a/include/libyuv/rotate_row.h +++ b/include/libyuv/rotate_row.h @@ -18,7 +18,7 @@ namespace libyuv { extern "C" { #endif -#if defined(__pnacl__) || defined(__CLR_VER) || \ +#if defined(LIBYUV_BIT_EXACT) || defined(__pnacl__) || defined(__CLR_VER) || \ (defined(__native_client__) && defined(__x86_64__)) || \ (defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) #define LIBYUV_DISABLE_X86 diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 1444a0478..53ab0335a 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -20,7 +20,7 @@ namespace libyuv { extern "C" { #endif -#if defined(__pnacl__) || defined(__CLR_VER) || \ +#if defined(LIBYUV_BIT_EXACT) || defined(__pnacl__) || defined(__CLR_VER) || \ (defined(__native_client__) && defined(__x86_64__)) || \ (defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) #define LIBYUV_DISABLE_X86 diff --git a/include/libyuv/scale_row.h b/include/libyuv/scale_row.h index 461ac36f3..fb668ff0c 100644 --- a/include/libyuv/scale_row.h +++ b/include/libyuv/scale_row.h @@ -19,7 +19,7 @@ namespace libyuv { extern "C" { #endif -#if defined(__pnacl__) || defined(__CLR_VER) || \ +#if defined(LIBYUV_BIT_EXACT) || defined(__pnacl__) || defined(__CLR_VER) || \ (defined(__native_client__) && defined(__x86_64__)) || \ (defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) #define LIBYUV_DISABLE_X86 diff --git a/include/libyuv/version.h b/include/libyuv/version.h index db63a25cb..2775c27ac 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1796 +#define LIBYUV_VERSION 1798 #endif // INCLUDE_LIBYUV_VERSION_H_ \ No newline at end of file diff --git a/source/row_common.cc b/source/row_common.cc index 517b70562..a5ab81f26 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -28,13 +28,12 @@ extern "C" { // The following macro from row_win makes the C code match the row_win code, // which is 7 bit fixed point for ARGBToI420: -#if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && \ +#if !defined(LIBYUV_BIT_EXACT) && !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && \ !defined(__clang__) && (defined(_M_IX86) || defined(_M_X64)) #define LIBYUV_RGB7 1 #endif -#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \ - defined(_M_IX86) +#if !defined(LIBYUV_BIT_EXACT) && (defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86)) #define LIBYUV_ARGBTOUV_PAVGB 1 #define LIBYUV_RGBTOU_TRUNCATE 1 #endif @@ -522,6 +521,7 @@ static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) { #define AVGB(a, b) (((a) + (b) + 1) >> 1) +// LIBYUV_RGBTOU_TRUNCATE mimics x86 code that does not round. #ifdef LIBYUV_RGBTOU_TRUNCATE static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) { return (112 * b - 74 * g - 38 * r + 0x8000) >> 8; @@ -530,7 +530,7 @@ static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) { return (112 * r - 94 * g - 18 * b + 0x8000) >> 8; } #else -// TODO(fbarchard): Add rounding to SIMD and use this +// TODO(fbarchard): Add rounding to x86 SIMD and use this static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) { return (112 * b - 74 * g - 38 * r + 0x8080) >> 8; } @@ -539,6 +539,7 @@ static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) { } #endif +// LIBYUV_ARGBTOUV_PAVGB mimics x86 code that subsamples with 2 pavgb. #if !defined(LIBYUV_ARGBTOUV_PAVGB) static __inline int RGB2xToU(uint16_t r, uint16_t g, uint16_t b) { return ((112 / 2) * b - (74 / 2) * g - (38 / 2) * r + 0x8080) >> 8; @@ -551,7 +552,6 @@ static __inline int RGB2xToV(uint16_t r, uint16_t g, uint16_t b) { // ARGBToY_C and ARGBToUV_C // Intel version mimic SSE/AVX which does 2 pavgb #if LIBYUV_ARGBTOUV_PAVGB - #define MAKEROWY(NAME, R, G, B, BPP) \ void NAME##ToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \ int x; \ diff --git a/source/scale.cc b/source/scale.cc index 03b0486f7..cda10e2b9 100644 --- a/source/scale.cc +++ b/source/scale.cc @@ -911,7 +911,7 @@ static void ScalePlaneBox(int src_width, for (j = 0; j < dst_height; ++j) { int boxheight; int iy = y >> 16; - const uint8_t* src = src_ptr + iy * src_stride; + const uint8_t* src = src_ptr + iy * (int64_t)src_stride; y += dy; if (y > max_y) { y = max_y; @@ -970,7 +970,7 @@ static void ScalePlaneBox_16(int src_width, for (j = 0; j < dst_height; ++j) { int boxheight; int iy = y >> 16; - const uint16_t* src = src_ptr + iy * src_stride; + const uint16_t* src = src_ptr + iy * (int64_t)src_stride; y += dy; if (y > max_y) { y = max_y; @@ -1087,7 +1087,7 @@ void ScalePlaneBilinearDown(int src_width, for (j = 0; j < dst_height; ++j) { int yi = y >> 16; - const uint8_t* src = src_ptr + yi * src_stride; + const uint8_t* src = src_ptr + yi * (int64_t)src_stride; if (filtering == kFilterLinear) { ScaleFilterCols(dst_ptr, src, dst_width, x, dx); } else { @@ -1178,7 +1178,7 @@ void ScalePlaneBilinearDown_16(int src_width, for (j = 0; j < dst_height; ++j) { int yi = y >> 16; - const uint16_t* src = src_ptr + yi * src_stride; + const uint16_t* src = src_ptr + yi * (int64_t)src_stride; if (filtering == kFilterLinear) { ScaleFilterCols(dst_ptr, src, dst_width, x, dx); } else { @@ -1290,7 +1290,7 @@ void ScalePlaneBilinearUp(int src_width, } { int yi = y >> 16; - const uint8_t* src = src_ptr + yi * src_stride; + const uint8_t* src = src_ptr + yi * (int64_t)src_stride; // Allocate 2 row buffers. const int kRowSize = (dst_width + 31) & ~31; @@ -1313,7 +1313,7 @@ void ScalePlaneBilinearUp(int src_width, if (y > max_y) { y = max_y; yi = y >> 16; - src = src_ptr + yi * src_stride; + src = src_ptr + yi * (int64_t)src_stride; } if (yi != lasty) { ScaleFilterCols(rowptr, src, dst_width, x, dx); @@ -1383,13 +1383,13 @@ void ScalePlaneUp2_Linear(int src_width, #endif if (dst_height == 1) { - ScaleRowUp(src_ptr + ((src_height - 1) / 2) * src_stride, dst_ptr, + ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr, dst_width); } else { dy = FixedDiv(src_height - 1, dst_height - 1); y = (1 << 15) - 1; for (i = 0; i < dst_height; ++i) { - ScaleRowUp(src_ptr + (y >> 16) * src_stride, dst_ptr, dst_width); + ScaleRowUp(src_ptr + (y >> 16) * (int64_t)src_stride, dst_ptr, dst_width); dst_ptr += dst_stride; y += dy; } @@ -1496,13 +1496,13 @@ void ScalePlaneUp2_12_Linear(int src_width, #endif if (dst_height == 1) { - ScaleRowUp(src_ptr + ((src_height - 1) / 2) * src_stride, dst_ptr, + ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr, dst_width); } else { dy = FixedDiv(src_height - 1, dst_height - 1); y = (1 << 15) - 1; for (i = 0; i < dst_height; ++i) { - ScaleRowUp(src_ptr + (y >> 16) * src_stride, dst_ptr, dst_width); + ScaleRowUp(src_ptr + (y >> 16) * (int64_t)src_stride, dst_ptr, dst_width); dst_ptr += dst_stride; y += dy; } @@ -1597,13 +1597,13 @@ void ScalePlaneUp2_16_Linear(int src_width, #endif if (dst_height == 1) { - ScaleRowUp(src_ptr + ((src_height - 1) / 2) * src_stride, dst_ptr, + ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr, dst_width); } else { dy = FixedDiv(src_height - 1, dst_height - 1); y = (1 << 15) - 1; for (i = 0; i < dst_height; ++i) { - ScaleRowUp(src_ptr + (y >> 16) * src_stride, dst_ptr, dst_width); + ScaleRowUp(src_ptr + (y >> 16) * (int64_t)src_stride, dst_ptr, dst_width); dst_ptr += dst_stride; y += dy; } @@ -1743,7 +1743,7 @@ void ScalePlaneBilinearUp_16(int src_width, } { int yi = y >> 16; - const uint16_t* src = src_ptr + yi * src_stride; + const uint16_t* src = src_ptr + yi * (int64_t)src_stride; // Allocate 2 row buffers. const int kRowSize = (dst_width + 31) & ~31; @@ -1766,7 +1766,7 @@ void ScalePlaneBilinearUp_16(int src_width, if (y > max_y) { y = max_y; yi = y >> 16; - src = src_ptr + yi * src_stride; + src = src_ptr + yi * (int64_t)src_stride; } if (yi != lasty) { ScaleFilterCols(rowptr, src, dst_width, x, dx); @@ -1829,7 +1829,7 @@ static void ScalePlaneSimple(int src_width, } for (i = 0; i < dst_height; ++i) { - ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx); + ScaleCols(dst_ptr, src_ptr + (y >> 16) * (int64_t)src_stride, dst_width, x, dx); dst_ptr += dst_stride; y += dy; } @@ -1870,7 +1870,7 @@ static void ScalePlaneSimple_16(int src_width, } for (i = 0; i < dst_height; ++i) { - ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx); + ScaleCols(dst_ptr, src_ptr + (y >> 16) * (int64_t)src_stride, dst_width, x, dx); dst_ptr += dst_stride; y += dy; } @@ -1896,7 +1896,7 @@ void ScalePlane(const uint8_t* src, // Negative height means invert the image. if (src_height < 0) { src_height = -src_height; - src = src + (src_height - 1) * src_stride; + src = src + (src_height - 1) * (int64_t)src_stride; src_stride = -src_stride; } @@ -1990,7 +1990,7 @@ void ScalePlane_16(const uint16_t* src, // Negative height means invert the image. if (src_height < 0) { src_height = -src_height; - src = src + (src_height - 1) * src_stride; + src = src + (src_height - 1) * (int64_t)src_stride; src_stride = -src_stride; } @@ -2084,7 +2084,7 @@ void ScalePlane_12(const uint16_t* src, // Negative height means invert the image. if (src_height < 0) { src_height = -src_height; - src = src + (src_height - 1) * src_stride; + src = src + (src_height - 1) * (int64_t)src_stride; src_stride = -src_stride; } diff --git a/source/scale_argb.cc b/source/scale_argb.cc index 451d4ec4d..073df1ae5 100644 --- a/source/scale_argb.cc +++ b/source/scale_argb.cc @@ -58,9 +58,9 @@ static void ScaleARGBDown2(int src_width, assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2. // Advance to odd row, even column. if (filtering == kFilterBilinear) { - src_argb += (y >> 16) * src_stride + (x >> 16) * 4; + src_argb += (y >> 16) * (int64_t)src_stride + (x >> 16) * 4; } else { - src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4; + src_argb += (y >> 16) * (int64_t)src_stride + ((x >> 16) - 1) * 4; } #if defined(HAS_SCALEARGBROWDOWN2_SSE2) @@ -162,7 +162,7 @@ static void ScaleARGBDown4Box(int src_width, uint8_t* dst_argb, int dst_width) = ScaleARGBRowDown2Box_C; // Advance to odd row, even column. - src_argb += (y >> 16) * src_stride + (x >> 16) * 4; + src_argb += (y >> 16) * (int64_t)src_stride + (x >> 16) * 4; (void)src_width; (void)src_height; (void)dx; @@ -214,7 +214,7 @@ static void ScaleARGBDownEven(int src_width, enum FilterMode filtering) { int j; int col_step = dx >> 16; - int row_stride = (dy >> 16) * src_stride; + int row_stride = (dy >> 16) * (int64_t)src_stride; void (*ScaleARGBRowDownEven)(const uint8_t* src_argb, ptrdiff_t src_stride, int src_step, uint8_t* dst_argb, int dst_width) = filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C; @@ -222,7 +222,7 @@ static void ScaleARGBDownEven(int src_width, (void)src_height; assert(IS_ALIGNED(src_width, 2)); assert(IS_ALIGNED(src_height, 2)); - src_argb += (y >> 16) * src_stride + (x >> 16) * 4; + src_argb += (y >> 16) * (int64_t)src_stride + (x >> 16) * 4; #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2 @@ -372,7 +372,7 @@ static void ScaleARGBBilinearDown(int src_width, } for (j = 0; j < dst_height; ++j) { int yi = y >> 16; - const uint8_t* src = src_argb + yi * src_stride; + const uint8_t* src = src_argb + yi * (int64_t)src_stride; if (filtering == kFilterLinear) { ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx); } else { @@ -526,7 +526,7 @@ static void ScaleARGBBilinearUp(int src_width, { int yi = y >> 16; - const uint8_t* src = src_argb + yi * src_stride; + const uint8_t* src = src_argb + yi * (int64_t)src_stride; // Allocate 2 rows of ARGB. const int kRowSize = (dst_width * 4 + 31) & ~31; @@ -549,7 +549,7 @@ static void ScaleARGBBilinearUp(int src_width, if (y > max_y) { y = max_y; yi = y >> 16; - src = src_argb + yi * src_stride; + src = src_argb + yi * (int64_t)src_stride; } if (yi != lasty) { ScaleARGBFilterCols(rowptr, src, dst_width, x, dx); @@ -750,9 +750,9 @@ static void ScaleYUVToARGBBilinearUp(int src_width, const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate. int yi = y >> 16; int uv_yi = yi >> kYShift; - const uint8_t* src_row_y = src_y + yi * src_stride_y; - const uint8_t* src_row_u = src_u + uv_yi * src_stride_u; - const uint8_t* src_row_v = src_v + uv_yi * src_stride_v; + const uint8_t* src_row_y = src_y + yi * (int64_t)src_stride_y; + const uint8_t* src_row_u = src_u + uv_yi * (int64_t)src_stride_u; + const uint8_t* src_row_v = src_v + uv_yi * (int64_t)src_stride_v; // Allocate 2 rows of ARGB. const int kRowSize = (dst_width * 4 + 31) & ~31; @@ -790,9 +790,9 @@ static void ScaleYUVToARGBBilinearUp(int src_width, y = max_y; yi = y >> 16; uv_yi = yi >> kYShift; - src_row_y = src_y + yi * src_stride_y; - src_row_u = src_u + uv_yi * src_stride_u; - src_row_v = src_v + uv_yi * src_stride_v; + src_row_y = src_y + yi * (int64_t)src_stride_y; + src_row_u = src_u + uv_yi * (int64_t)src_stride_u; + src_row_v = src_v + uv_yi * (int64_t)src_stride_v; } if (yi != lasty) { // TODO(fbarchard): Convert the clipped region of row. @@ -888,7 +888,7 @@ static void ScaleARGBSimple(int src_width, } for (j = 0; j < dst_height; ++j) { - ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride, dst_width, x, + ScaleARGBCols(dst_argb, src_argb + (y >> 16) * (int64_t)src_stride, dst_width, x, dx); dst_argb += dst_stride; y += dy; @@ -924,7 +924,7 @@ static void ScaleARGB(const uint8_t* src, // Negative src_height means invert the image. if (src_height < 0) { src_height = -src_height; - src = src + (src_height - 1) * src_stride; + src = src + (src_height - 1) * (int64_t)src_stride; src_stride = -src_stride; } ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, @@ -939,7 +939,7 @@ static void ScaleARGB(const uint8_t* src, if (clip_y) { int64_t clipf = (int64_t)(clip_y)*dy; y += (clipf & 0xffff); - src += (clipf >> 16) * src_stride; + src += (clipf >> 16) * (int64_t)src_stride; dst += clip_y * dst_stride; } @@ -973,7 +973,7 @@ static void ScaleARGB(const uint8_t* src, filtering = kFilterNone; if (dx == 0x10000 && dy == 0x10000) { // Straight copy. - ARGBCopy(src + (y >> 16) * src_stride + (x >> 16) * 4, src_stride, + ARGBCopy(src + (y >> 16) * (int64_t)src_stride + (x >> 16) * 4, src_stride, dst, dst_stride, clip_width, clip_height); return; } diff --git a/source/scale_uv.cc b/source/scale_uv.cc index d9a314453..8a7f2abb3 100644 --- a/source/scale_uv.cc +++ b/source/scale_uv.cc @@ -83,9 +83,9 @@ static void ScaleUVDown2(int src_width, assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2. // Advance to odd row, even column. if (filtering == kFilterBilinear) { - src_uv += (y >> 16) * src_stride + (x >> 16) * 2; + src_uv += (y >> 16) * (int64_t)src_stride + (x >> 16) * 2; } else { - src_uv += (y >> 16) * src_stride + ((x >> 16) - 1) * 2; + src_uv += (y >> 16) * (int64_t)src_stride + ((x >> 16) - 1) * 2; } #if defined(HAS_SCALEUVROWDOWN2BOX_SSSE3) @@ -216,7 +216,7 @@ static void ScaleUVDown4Box(int src_width, uint8_t* dst_uv, int dst_width) = ScaleUVRowDown2Box_C; // Advance to odd row, even column. - src_uv += (y >> 16) * src_stride + (x >> 16) * 2; + src_uv += (y >> 16) * (int64_t)src_stride + (x >> 16) * 2; (void)src_width; (void)src_height; (void)dx; @@ -279,7 +279,7 @@ static void ScaleUVDownEven(int src_width, enum FilterMode filtering) { int j; int col_step = dx >> 16; - int row_stride = (dy >> 16) * src_stride; + int row_stride = (dy >> 16) * (int64_t)src_stride; void (*ScaleUVRowDownEven)(const uint8_t* src_uv, ptrdiff_t src_stride, int src_step, uint8_t* dst_uv, int dst_width) = filtering ? ScaleUVRowDownEvenBox_C : ScaleUVRowDownEven_C; @@ -287,7 +287,7 @@ static void ScaleUVDownEven(int src_width, (void)src_height; assert(IS_ALIGNED(src_width, 2)); assert(IS_ALIGNED(src_height, 2)); - src_uv += (y >> 16) * src_stride + (x >> 16) * 2; + src_uv += (y >> 16) * (int64_t)src_stride + (x >> 16) * 2; #if defined(HAS_SCALEUVROWDOWNEVEN_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ScaleUVRowDownEven = filtering ? ScaleUVRowDownEvenBox_Any_SSSE3 @@ -447,7 +447,7 @@ static void ScaleUVBilinearDown(int src_width, } for (j = 0; j < dst_height; ++j) { int yi = y >> 16; - const uint8_t* src = src_uv + yi * src_stride; + const uint8_t* src = src_uv + yi * (int64_t)src_stride; if (filtering == kFilterLinear) { ScaleUVFilterCols(dst_uv, src, dst_width, x, dx); } else { @@ -602,7 +602,7 @@ static void ScaleUVBilinearUp(int src_width, { int yi = y >> 16; - const uint8_t* src = src_uv + yi * src_stride; + const uint8_t* src = src_uv + yi * (int64_t)src_stride; // Allocate 2 rows of UV. const int kRowSize = (dst_width * 2 + 15) & ~15; @@ -625,7 +625,7 @@ static void ScaleUVBilinearUp(int src_width, if (y > max_y) { y = max_y; yi = y >> 16; - src = src_uv + yi * src_stride; + src = src_uv + yi * (int64_t)src_stride; } if (yi != lasty) { ScaleUVFilterCols(rowptr, src, dst_width, x, dx); @@ -690,12 +690,12 @@ void ScaleUVLinearUp2(int src_width, #endif if (dst_height == 1) { - ScaleRowUp(src_uv + ((src_height - 1) / 2) * src_stride, dst_uv, dst_width); + ScaleRowUp(src_uv + ((src_height - 1) / 2) * (int64_t)src_stride, dst_uv, dst_width); } else { dy = FixedDiv(src_height - 1, dst_height - 1); y = (1 << 15) - 1; for (i = 0; i < dst_height; ++i) { - ScaleRowUp(src_uv + (y >> 16) * src_stride, dst_uv, dst_width); + ScaleRowUp(src_uv + (y >> 16) * (int64_t)src_stride, dst_uv, dst_width); dst_uv += dst_stride; y += dy; } @@ -796,12 +796,12 @@ void ScaleUVLinearUp2_16(int src_width, #endif if (dst_height == 1) { - ScaleRowUp(src_uv + ((src_height - 1) / 2) * src_stride, dst_uv, dst_width); + ScaleRowUp(src_uv + ((src_height - 1) / 2) * (int64_t)src_stride, dst_uv, dst_width); } else { dy = FixedDiv(src_height - 1, dst_height - 1); y = (1 << 15) - 1; for (i = 0; i < dst_height; ++i) { - ScaleRowUp(src_uv + (y >> 16) * src_stride, dst_uv, dst_width); + ScaleRowUp(src_uv + (y >> 16) * (int64_t)src_stride, dst_uv, dst_width); dst_uv += dst_stride; y += dy; } @@ -927,7 +927,7 @@ static void ScaleUVSimple(int src_width, } for (j = 0; j < dst_height; ++j) { - ScaleUVCols(dst_uv, src_uv + (y >> 16) * src_stride, dst_width, x, dx); + ScaleUVCols(dst_uv, src_uv + (y >> 16) * (int64_t)src_stride, dst_width, x, dx); dst_uv += dst_stride; y += dy; } @@ -935,43 +935,43 @@ static void ScaleUVSimple(int src_width, // Copy UV with optional flipping #if HAS_UVCOPY -static int UVCopy(const uint8_t* src_UV, +static int UVCopy(const uint8_t* src_uv, int src_stride_uv, - uint8_t* dst_UV, + uint8_t* dst_uv, int dst_stride_uv, int width, int height) { - if (!src_UV || !dst_UV || width <= 0 || height == 0) { + if (!src_uv || !dst_uv || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_UV = src_UV + (height - 1) * src_stride_uv; + src_uv = src_uv + (height - 1) * (int64_t)src_stride_uv; src_stride_uv = -src_stride_uv; } - CopyPlane(src_UV, src_stride_uv, dst_UV, dst_stride_uv, width * 2, height); + CopyPlane(src_uv, src_stride_uv, dst_uv, dst_stride_uv, width * 2, height); return 0; } -static int UVCopy_16(const uint16_t* src_UV, +static int UVCopy_16(const uint16_t* src_uv, int src_stride_uv, - uint16_t* dst_UV, + uint16_t* dst_uv, int dst_stride_uv, int width, int height) { - if (!src_UV || !dst_UV || width <= 0 || height == 0) { + if (!src_uv || !dst_uv || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_UV = src_UV + (height - 1) * src_stride_uv; + src_uv = src_uv + (height - 1) * (int64_t)src_stride_uv; src_stride_uv = -src_stride_uv; } - CopyPlane_16(src_UV, src_stride_uv, dst_UV, dst_stride_uv, width * 2, height); + CopyPlane_16(src_uv, src_stride_uv, dst_uv, dst_stride_uv, width * 2, height); return 0; } #endif // HAS_UVCOPY @@ -1005,7 +1005,7 @@ static void ScaleUV(const uint8_t* src, // Negative src_height means invert the image. if (src_height < 0) { src_height = -src_height; - src = src + (src_height - 1) * src_stride; + src = src + (src_height - 1) * (int64_t)src_stride; src_stride = -src_stride; } ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, @@ -1020,7 +1020,7 @@ static void ScaleUV(const uint8_t* src, if (clip_y) { int64_t clipf = (int64_t)(clip_y)*dy; y += (clipf & 0xffff); - src += (clipf >> 16) * src_stride; + src += (clipf >> 16) * (int64_t)src_stride; dst += clip_y * dst_stride; } @@ -1061,7 +1061,7 @@ static void ScaleUV(const uint8_t* src, #ifdef HAS_UVCOPY if (dx == 0x10000 && dy == 0x10000) { // Straight copy. - UVCopy(src + (y >> 16) * src_stride + (x >> 16) * 2, src_stride, dst, + UVCopy(src + (y >> 16) * (int64_t)src_stride + (x >> 16) * 2, src_stride, dst, dst_stride, clip_width, clip_height); return; } @@ -1155,7 +1155,7 @@ int UVScale_16(const uint16_t* src_uv, // Negative src_height means invert the image. if (src_height < 0) { src_height = -src_height; - src_uv = src_uv + (src_height - 1) * src_stride_uv; + src_uv = src_uv + (src_height - 1) * (int64_t)src_stride_uv; src_stride_uv = -src_stride_uv; } src_width = Abs(src_width); @@ -1163,11 +1163,11 @@ int UVScale_16(const uint16_t* src_uv, #ifdef HAS_UVCOPY if (!filtering && src_width == dst_width && (src_height % dst_height == 0)) { if (dst_height == 1) { - UVCopy_16(src_uv + ((src_height - 1) / 2) * src_stride_uv, src_stride_uv, + UVCopy_16(src_uv + ((src_height - 1) / 2) * (int64_t)src_stride_uv, src_stride_uv, dst_uv, dst_stride_uv, dst_width, dst_height); } else { dy = src_height / dst_height; - UVCopy_16(src_uv + src_stride_uv * ((dy - 1) / 2), src_stride_uv * dy, + UVCopy_16(src_uv + ((dy - 1) / 2) * (int64_t)src_stride_uv, dy * (int64_t)src_stride_uv, dst_uv, dst_stride_uv, dst_width, dst_height); } diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc index 8f9d57985..9cb766632 100644 --- a/unit_test/convert_test.cc +++ b/unit_test/convert_test.cc @@ -1591,20 +1591,6 @@ TESTEND(BGRAToARGB, uint8_t, 4, 4, 1) TESTEND(ABGRToARGB, uint8_t, 4, 4, 1) TESTEND(AB64ToAR64, uint16_t, 4, 4, 1) -TEST_F(LibYUVConvertTest, Test565) { - SIMD_ALIGNED(uint8_t orig_pixels[256][4]); - SIMD_ALIGNED(uint8_t pixels565[256][2]); - - for (int i = 0; i < 256; ++i) { - for (int j = 0; j < 4; ++j) { - orig_pixels[i][j] = i; - } - } - ARGBToRGB565(&orig_pixels[0][0], 0, &pixels565[0][0], 0, 256, 1); - uint32_t checksum = HashDjb2(&pixels565[0][0], sizeof(pixels565), 5381); - EXPECT_EQ(610919429u, checksum); -} - #ifdef HAVE_JPEG TEST_F(LibYUVConvertTest, ValidateJpeg) { const int kOff = 10; @@ -3831,10 +3817,11 @@ TEST_F(LibYUVConvertTest, TestH420ToARGB) { ++histogram_b[b]; ++histogram_g[g]; ++histogram_r[r]; - int expected_y = Clamp(static_cast((i - 16) * 1.164f)); - EXPECT_NEAR(b, expected_y, 1); - EXPECT_NEAR(g, expected_y, 1); - EXPECT_NEAR(r, expected_y, 1); + // Reference formula for Y channel contribution in YUV to RGB conversions: + int expected_y = Clamp(static_cast((i - 16) * 1.164f + 0.5f)); + EXPECT_EQ(b, expected_y); + EXPECT_EQ(g, expected_y); + EXPECT_EQ(r, expected_y); EXPECT_EQ(a, 255); } @@ -3956,7 +3943,7 @@ TEST_F(LibYUVConvertTest, TestH010ToAR30) { ++histogram_b[b10]; ++histogram_g[g10]; ++histogram_r[r10]; - int expected_y = Clamp10(static_cast((i - 64) * 1.164f)); + int expected_y = Clamp10(static_cast((i - 64) * 1.164f + 0.5)); EXPECT_NEAR(b10, expected_y, 4); EXPECT_NEAR(g10, expected_y, 4); EXPECT_NEAR(r10, expected_y, 4); @@ -4133,6 +4120,48 @@ TEST_F(LibYUVConvertTest, TestARGBToRGB24) { free_aligned_buffer_page_end(dest_rgb24); } +TEST_F(LibYUVConvertTest, Test565) { + SIMD_ALIGNED(uint8_t orig_pixels[256][4]); + SIMD_ALIGNED(uint8_t pixels565[256][2]); + + for (int i = 0; i < 256; ++i) { + for (int j = 0; j < 4; ++j) { + orig_pixels[i][j] = i; + } + } + ARGBToRGB565(&orig_pixels[0][0], 0, &pixels565[0][0], 0, 256, 1); + uint32_t checksum = HashDjb2(&pixels565[0][0], sizeof(pixels565), 5381); + EXPECT_EQ(610919429u, checksum); +} + +// Test RGB24 to J420 is exact +#if defined(LIBYUV_BIT_EXACT) +TEST_F(LibYUVConvertTest, TestRGB24ToJ420) { + const int kSize = 256; + align_buffer_page_end(orig_rgb24, kSize * 3 * 2); // 2 rows of RGB24 + align_buffer_page_end(dest_j420, kSize * 3 / 2 * 2); + int iterations256 = (benchmark_width_ * benchmark_height_ + (kSize * 2 - 1)) / (kSize * 2) * benchmark_iterations_; + + for (int i = 0; i < kSize * 3 * 2; ++i) { + orig_rgb24[i] = i; + } + + for (int i = 0; i < iterations256; ++i) { + RGB24ToJ420(orig_rgb24, kSize * 3, + dest_j420, kSize, // Y plane + dest_j420 + kSize * 2, kSize / 2, // U plane + dest_j420 + kSize * 5 / 2, kSize / 2, // V plane + kSize, 2); + } + + uint32_t checksum = HashDjb2(dest_j420, kSize * 3 / 2 * 2, 5381); + EXPECT_EQ(2755440272u, checksum); + + free_aligned_buffer_page_end(orig_rgb24); + free_aligned_buffer_page_end(dest_j420); +} +#endif + // Test I400 with jpeg matrix is same as J400 TEST_F(LibYUVConvertTest, TestI400) { const int kSize = 256;