Add LIBYUV_BIT_EXACT macro to force C to match SIMD

- C code use ARM path, so NEON and C match
- C used on Intel platforms, disabling AVX.

Bug: libyuv:908, b/202888439
Change-Id: Ie035a150a60d3cf4ee7c849a96819d43640cf020
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3223507
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Reviewed-by: richard winterton <rrwinterton@gmail.com>
This commit is contained in:
Frank Barchard 2021-10-14 13:06:54 -07:00 committed by libyuv LUCI CQ
parent daf9778a24
commit 11cbf8f976
12 changed files with 126 additions and 97 deletions

View File

@ -1,6 +1,6 @@
Name: libyuv Name: libyuv
URL: http://code.google.com/p/libyuv/ URL: http://code.google.com/p/libyuv/
Version: 1796 Version: 1798
License: BSD License: BSD
License File: LICENSE License File: LICENSE

View File

@ -18,7 +18,7 @@ namespace libyuv {
extern "C" { extern "C" {
#endif #endif
#if defined(__pnacl__) || defined(__CLR_VER) || \ #if defined(LIBYUV_BIT_EXACT) || defined(__pnacl__) || defined(__CLR_VER) || \
(defined(__native_client__) && defined(__x86_64__)) || \ (defined(__native_client__) && defined(__x86_64__)) || \
(defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) (defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
#define LIBYUV_DISABLE_X86 #define LIBYUV_DISABLE_X86

View File

@ -23,7 +23,7 @@ extern "C" {
#endif #endif
// TODO(fbarchard): Move cpu macros to row.h // TODO(fbarchard): Move cpu macros to row.h
#if defined(__pnacl__) || defined(__CLR_VER) || \ #if defined(LIBYUV_BIT_EXACT) || defined(__pnacl__) || defined(__CLR_VER) || \
(defined(__native_client__) && defined(__x86_64__)) || \ (defined(__native_client__) && defined(__x86_64__)) || \
(defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) (defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
#define LIBYUV_DISABLE_X86 #define LIBYUV_DISABLE_X86

View File

@ -18,7 +18,7 @@ namespace libyuv {
extern "C" { extern "C" {
#endif #endif
#if defined(__pnacl__) || defined(__CLR_VER) || \ #if defined(LIBYUV_BIT_EXACT) || defined(__pnacl__) || defined(__CLR_VER) || \
(defined(__native_client__) && defined(__x86_64__)) || \ (defined(__native_client__) && defined(__x86_64__)) || \
(defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) (defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
#define LIBYUV_DISABLE_X86 #define LIBYUV_DISABLE_X86

View File

@ -20,7 +20,7 @@ namespace libyuv {
extern "C" { extern "C" {
#endif #endif
#if defined(__pnacl__) || defined(__CLR_VER) || \ #if defined(LIBYUV_BIT_EXACT) || defined(__pnacl__) || defined(__CLR_VER) || \
(defined(__native_client__) && defined(__x86_64__)) || \ (defined(__native_client__) && defined(__x86_64__)) || \
(defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) (defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
#define LIBYUV_DISABLE_X86 #define LIBYUV_DISABLE_X86

View File

@ -19,7 +19,7 @@ namespace libyuv {
extern "C" { extern "C" {
#endif #endif
#if defined(__pnacl__) || defined(__CLR_VER) || \ #if defined(LIBYUV_BIT_EXACT) || defined(__pnacl__) || defined(__CLR_VER) || \
(defined(__native_client__) && defined(__x86_64__)) || \ (defined(__native_client__) && defined(__x86_64__)) || \
(defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) (defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
#define LIBYUV_DISABLE_X86 #define LIBYUV_DISABLE_X86

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_ #ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1796 #define LIBYUV_VERSION 1798
#endif // INCLUDE_LIBYUV_VERSION_H_ #endif // INCLUDE_LIBYUV_VERSION_H_

View File

@ -28,13 +28,12 @@ extern "C" {
// The following macro from row_win makes the C code match the row_win code, // The following macro from row_win makes the C code match the row_win code,
// which is 7 bit fixed point for ARGBToI420: // which is 7 bit fixed point for ARGBToI420:
#if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && \ #if !defined(LIBYUV_BIT_EXACT) && !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && \
!defined(__clang__) && (defined(_M_IX86) || defined(_M_X64)) !defined(__clang__) && (defined(_M_IX86) || defined(_M_X64))
#define LIBYUV_RGB7 1 #define LIBYUV_RGB7 1
#endif #endif
#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \ #if !defined(LIBYUV_BIT_EXACT) && (defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86))
defined(_M_IX86)
#define LIBYUV_ARGBTOUV_PAVGB 1 #define LIBYUV_ARGBTOUV_PAVGB 1
#define LIBYUV_RGBTOU_TRUNCATE 1 #define LIBYUV_RGBTOU_TRUNCATE 1
#endif #endif
@ -522,6 +521,7 @@ static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) {
#define AVGB(a, b) (((a) + (b) + 1) >> 1) #define AVGB(a, b) (((a) + (b) + 1) >> 1)
// LIBYUV_RGBTOU_TRUNCATE mimics x86 code that does not round.
#ifdef LIBYUV_RGBTOU_TRUNCATE #ifdef LIBYUV_RGBTOU_TRUNCATE
static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) { static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) {
return (112 * b - 74 * g - 38 * r + 0x8000) >> 8; return (112 * b - 74 * g - 38 * r + 0x8000) >> 8;
@ -530,7 +530,7 @@ static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) {
return (112 * r - 94 * g - 18 * b + 0x8000) >> 8; return (112 * r - 94 * g - 18 * b + 0x8000) >> 8;
} }
#else #else
// TODO(fbarchard): Add rounding to SIMD and use this // TODO(fbarchard): Add rounding to x86 SIMD and use this
static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) { static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) {
return (112 * b - 74 * g - 38 * r + 0x8080) >> 8; return (112 * b - 74 * g - 38 * r + 0x8080) >> 8;
} }
@ -539,6 +539,7 @@ static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) {
} }
#endif #endif
// LIBYUV_ARGBTOUV_PAVGB mimics x86 code that subsamples with 2 pavgb.
#if !defined(LIBYUV_ARGBTOUV_PAVGB) #if !defined(LIBYUV_ARGBTOUV_PAVGB)
static __inline int RGB2xToU(uint16_t r, uint16_t g, uint16_t b) { static __inline int RGB2xToU(uint16_t r, uint16_t g, uint16_t b) {
return ((112 / 2) * b - (74 / 2) * g - (38 / 2) * r + 0x8080) >> 8; return ((112 / 2) * b - (74 / 2) * g - (38 / 2) * r + 0x8080) >> 8;
@ -551,7 +552,6 @@ static __inline int RGB2xToV(uint16_t r, uint16_t g, uint16_t b) {
// ARGBToY_C and ARGBToUV_C // ARGBToY_C and ARGBToUV_C
// Intel version mimic SSE/AVX which does 2 pavgb // Intel version mimic SSE/AVX which does 2 pavgb
#if LIBYUV_ARGBTOUV_PAVGB #if LIBYUV_ARGBTOUV_PAVGB
#define MAKEROWY(NAME, R, G, B, BPP) \ #define MAKEROWY(NAME, R, G, B, BPP) \
void NAME##ToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \ void NAME##ToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
int x; \ int x; \

View File

@ -911,7 +911,7 @@ static void ScalePlaneBox(int src_width,
for (j = 0; j < dst_height; ++j) { for (j = 0; j < dst_height; ++j) {
int boxheight; int boxheight;
int iy = y >> 16; int iy = y >> 16;
const uint8_t* src = src_ptr + iy * src_stride; const uint8_t* src = src_ptr + iy * (int64_t)src_stride;
y += dy; y += dy;
if (y > max_y) { if (y > max_y) {
y = max_y; y = max_y;
@ -970,7 +970,7 @@ static void ScalePlaneBox_16(int src_width,
for (j = 0; j < dst_height; ++j) { for (j = 0; j < dst_height; ++j) {
int boxheight; int boxheight;
int iy = y >> 16; int iy = y >> 16;
const uint16_t* src = src_ptr + iy * src_stride; const uint16_t* src = src_ptr + iy * (int64_t)src_stride;
y += dy; y += dy;
if (y > max_y) { if (y > max_y) {
y = max_y; y = max_y;
@ -1087,7 +1087,7 @@ void ScalePlaneBilinearDown(int src_width,
for (j = 0; j < dst_height; ++j) { for (j = 0; j < dst_height; ++j) {
int yi = y >> 16; int yi = y >> 16;
const uint8_t* src = src_ptr + yi * src_stride; const uint8_t* src = src_ptr + yi * (int64_t)src_stride;
if (filtering == kFilterLinear) { if (filtering == kFilterLinear) {
ScaleFilterCols(dst_ptr, src, dst_width, x, dx); ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
} else { } else {
@ -1178,7 +1178,7 @@ void ScalePlaneBilinearDown_16(int src_width,
for (j = 0; j < dst_height; ++j) { for (j = 0; j < dst_height; ++j) {
int yi = y >> 16; int yi = y >> 16;
const uint16_t* src = src_ptr + yi * src_stride; const uint16_t* src = src_ptr + yi * (int64_t)src_stride;
if (filtering == kFilterLinear) { if (filtering == kFilterLinear) {
ScaleFilterCols(dst_ptr, src, dst_width, x, dx); ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
} else { } else {
@ -1290,7 +1290,7 @@ void ScalePlaneBilinearUp(int src_width,
} }
{ {
int yi = y >> 16; int yi = y >> 16;
const uint8_t* src = src_ptr + yi * src_stride; const uint8_t* src = src_ptr + yi * (int64_t)src_stride;
// Allocate 2 row buffers. // Allocate 2 row buffers.
const int kRowSize = (dst_width + 31) & ~31; const int kRowSize = (dst_width + 31) & ~31;
@ -1313,7 +1313,7 @@ void ScalePlaneBilinearUp(int src_width,
if (y > max_y) { if (y > max_y) {
y = max_y; y = max_y;
yi = y >> 16; yi = y >> 16;
src = src_ptr + yi * src_stride; src = src_ptr + yi * (int64_t)src_stride;
} }
if (yi != lasty) { if (yi != lasty) {
ScaleFilterCols(rowptr, src, dst_width, x, dx); ScaleFilterCols(rowptr, src, dst_width, x, dx);
@ -1383,13 +1383,13 @@ void ScalePlaneUp2_Linear(int src_width,
#endif #endif
if (dst_height == 1) { if (dst_height == 1) {
ScaleRowUp(src_ptr + ((src_height - 1) / 2) * src_stride, dst_ptr, ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr,
dst_width); dst_width);
} else { } else {
dy = FixedDiv(src_height - 1, dst_height - 1); dy = FixedDiv(src_height - 1, dst_height - 1);
y = (1 << 15) - 1; y = (1 << 15) - 1;
for (i = 0; i < dst_height; ++i) { for (i = 0; i < dst_height; ++i) {
ScaleRowUp(src_ptr + (y >> 16) * src_stride, dst_ptr, dst_width); ScaleRowUp(src_ptr + (y >> 16) * (int64_t)src_stride, dst_ptr, dst_width);
dst_ptr += dst_stride; dst_ptr += dst_stride;
y += dy; y += dy;
} }
@ -1496,13 +1496,13 @@ void ScalePlaneUp2_12_Linear(int src_width,
#endif #endif
if (dst_height == 1) { if (dst_height == 1) {
ScaleRowUp(src_ptr + ((src_height - 1) / 2) * src_stride, dst_ptr, ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr,
dst_width); dst_width);
} else { } else {
dy = FixedDiv(src_height - 1, dst_height - 1); dy = FixedDiv(src_height - 1, dst_height - 1);
y = (1 << 15) - 1; y = (1 << 15) - 1;
for (i = 0; i < dst_height; ++i) { for (i = 0; i < dst_height; ++i) {
ScaleRowUp(src_ptr + (y >> 16) * src_stride, dst_ptr, dst_width); ScaleRowUp(src_ptr + (y >> 16) * (int64_t)src_stride, dst_ptr, dst_width);
dst_ptr += dst_stride; dst_ptr += dst_stride;
y += dy; y += dy;
} }
@ -1597,13 +1597,13 @@ void ScalePlaneUp2_16_Linear(int src_width,
#endif #endif
if (dst_height == 1) { if (dst_height == 1) {
ScaleRowUp(src_ptr + ((src_height - 1) / 2) * src_stride, dst_ptr, ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr,
dst_width); dst_width);
} else { } else {
dy = FixedDiv(src_height - 1, dst_height - 1); dy = FixedDiv(src_height - 1, dst_height - 1);
y = (1 << 15) - 1; y = (1 << 15) - 1;
for (i = 0; i < dst_height; ++i) { for (i = 0; i < dst_height; ++i) {
ScaleRowUp(src_ptr + (y >> 16) * src_stride, dst_ptr, dst_width); ScaleRowUp(src_ptr + (y >> 16) * (int64_t)src_stride, dst_ptr, dst_width);
dst_ptr += dst_stride; dst_ptr += dst_stride;
y += dy; y += dy;
} }
@ -1743,7 +1743,7 @@ void ScalePlaneBilinearUp_16(int src_width,
} }
{ {
int yi = y >> 16; int yi = y >> 16;
const uint16_t* src = src_ptr + yi * src_stride; const uint16_t* src = src_ptr + yi * (int64_t)src_stride;
// Allocate 2 row buffers. // Allocate 2 row buffers.
const int kRowSize = (dst_width + 31) & ~31; const int kRowSize = (dst_width + 31) & ~31;
@ -1766,7 +1766,7 @@ void ScalePlaneBilinearUp_16(int src_width,
if (y > max_y) { if (y > max_y) {
y = max_y; y = max_y;
yi = y >> 16; yi = y >> 16;
src = src_ptr + yi * src_stride; src = src_ptr + yi * (int64_t)src_stride;
} }
if (yi != lasty) { if (yi != lasty) {
ScaleFilterCols(rowptr, src, dst_width, x, dx); ScaleFilterCols(rowptr, src, dst_width, x, dx);
@ -1829,7 +1829,7 @@ static void ScalePlaneSimple(int src_width,
} }
for (i = 0; i < dst_height; ++i) { for (i = 0; i < dst_height; ++i) {
ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx); ScaleCols(dst_ptr, src_ptr + (y >> 16) * (int64_t)src_stride, dst_width, x, dx);
dst_ptr += dst_stride; dst_ptr += dst_stride;
y += dy; y += dy;
} }
@ -1870,7 +1870,7 @@ static void ScalePlaneSimple_16(int src_width,
} }
for (i = 0; i < dst_height; ++i) { for (i = 0; i < dst_height; ++i) {
ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx); ScaleCols(dst_ptr, src_ptr + (y >> 16) * (int64_t)src_stride, dst_width, x, dx);
dst_ptr += dst_stride; dst_ptr += dst_stride;
y += dy; y += dy;
} }
@ -1896,7 +1896,7 @@ void ScalePlane(const uint8_t* src,
// Negative height means invert the image. // Negative height means invert the image.
if (src_height < 0) { if (src_height < 0) {
src_height = -src_height; src_height = -src_height;
src = src + (src_height - 1) * src_stride; src = src + (src_height - 1) * (int64_t)src_stride;
src_stride = -src_stride; src_stride = -src_stride;
} }
@ -1990,7 +1990,7 @@ void ScalePlane_16(const uint16_t* src,
// Negative height means invert the image. // Negative height means invert the image.
if (src_height < 0) { if (src_height < 0) {
src_height = -src_height; src_height = -src_height;
src = src + (src_height - 1) * src_stride; src = src + (src_height - 1) * (int64_t)src_stride;
src_stride = -src_stride; src_stride = -src_stride;
} }
@ -2084,7 +2084,7 @@ void ScalePlane_12(const uint16_t* src,
// Negative height means invert the image. // Negative height means invert the image.
if (src_height < 0) { if (src_height < 0) {
src_height = -src_height; src_height = -src_height;
src = src + (src_height - 1) * src_stride; src = src + (src_height - 1) * (int64_t)src_stride;
src_stride = -src_stride; src_stride = -src_stride;
} }

View File

@ -58,9 +58,9 @@ static void ScaleARGBDown2(int src_width,
assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2. assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
// Advance to odd row, even column. // Advance to odd row, even column.
if (filtering == kFilterBilinear) { if (filtering == kFilterBilinear) {
src_argb += (y >> 16) * src_stride + (x >> 16) * 4; src_argb += (y >> 16) * (int64_t)src_stride + (x >> 16) * 4;
} else { } else {
src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4; src_argb += (y >> 16) * (int64_t)src_stride + ((x >> 16) - 1) * 4;
} }
#if defined(HAS_SCALEARGBROWDOWN2_SSE2) #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
@ -162,7 +162,7 @@ static void ScaleARGBDown4Box(int src_width,
uint8_t* dst_argb, int dst_width) = uint8_t* dst_argb, int dst_width) =
ScaleARGBRowDown2Box_C; ScaleARGBRowDown2Box_C;
// Advance to odd row, even column. // Advance to odd row, even column.
src_argb += (y >> 16) * src_stride + (x >> 16) * 4; src_argb += (y >> 16) * (int64_t)src_stride + (x >> 16) * 4;
(void)src_width; (void)src_width;
(void)src_height; (void)src_height;
(void)dx; (void)dx;
@ -214,7 +214,7 @@ static void ScaleARGBDownEven(int src_width,
enum FilterMode filtering) { enum FilterMode filtering) {
int j; int j;
int col_step = dx >> 16; int col_step = dx >> 16;
int row_stride = (dy >> 16) * src_stride; int row_stride = (dy >> 16) * (int64_t)src_stride;
void (*ScaleARGBRowDownEven)(const uint8_t* src_argb, ptrdiff_t src_stride, void (*ScaleARGBRowDownEven)(const uint8_t* src_argb, ptrdiff_t src_stride,
int src_step, uint8_t* dst_argb, int dst_width) = int src_step, uint8_t* dst_argb, int dst_width) =
filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C; filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
@ -222,7 +222,7 @@ static void ScaleARGBDownEven(int src_width,
(void)src_height; (void)src_height;
assert(IS_ALIGNED(src_width, 2)); assert(IS_ALIGNED(src_width, 2));
assert(IS_ALIGNED(src_height, 2)); assert(IS_ALIGNED(src_height, 2));
src_argb += (y >> 16) * src_stride + (x >> 16) * 4; src_argb += (y >> 16) * (int64_t)src_stride + (x >> 16) * 4;
#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2) #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) { if (TestCpuFlag(kCpuHasSSE2)) {
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2 ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2
@ -372,7 +372,7 @@ static void ScaleARGBBilinearDown(int src_width,
} }
for (j = 0; j < dst_height; ++j) { for (j = 0; j < dst_height; ++j) {
int yi = y >> 16; int yi = y >> 16;
const uint8_t* src = src_argb + yi * src_stride; const uint8_t* src = src_argb + yi * (int64_t)src_stride;
if (filtering == kFilterLinear) { if (filtering == kFilterLinear) {
ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx); ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx);
} else { } else {
@ -526,7 +526,7 @@ static void ScaleARGBBilinearUp(int src_width,
{ {
int yi = y >> 16; int yi = y >> 16;
const uint8_t* src = src_argb + yi * src_stride; const uint8_t* src = src_argb + yi * (int64_t)src_stride;
// Allocate 2 rows of ARGB. // Allocate 2 rows of ARGB.
const int kRowSize = (dst_width * 4 + 31) & ~31; const int kRowSize = (dst_width * 4 + 31) & ~31;
@ -549,7 +549,7 @@ static void ScaleARGBBilinearUp(int src_width,
if (y > max_y) { if (y > max_y) {
y = max_y; y = max_y;
yi = y >> 16; yi = y >> 16;
src = src_argb + yi * src_stride; src = src_argb + yi * (int64_t)src_stride;
} }
if (yi != lasty) { if (yi != lasty) {
ScaleARGBFilterCols(rowptr, src, dst_width, x, dx); ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
@ -750,9 +750,9 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate. const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate.
int yi = y >> 16; int yi = y >> 16;
int uv_yi = yi >> kYShift; int uv_yi = yi >> kYShift;
const uint8_t* src_row_y = src_y + yi * src_stride_y; const uint8_t* src_row_y = src_y + yi * (int64_t)src_stride_y;
const uint8_t* src_row_u = src_u + uv_yi * src_stride_u; const uint8_t* src_row_u = src_u + uv_yi * (int64_t)src_stride_u;
const uint8_t* src_row_v = src_v + uv_yi * src_stride_v; const uint8_t* src_row_v = src_v + uv_yi * (int64_t)src_stride_v;
// Allocate 2 rows of ARGB. // Allocate 2 rows of ARGB.
const int kRowSize = (dst_width * 4 + 31) & ~31; const int kRowSize = (dst_width * 4 + 31) & ~31;
@ -790,9 +790,9 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
y = max_y; y = max_y;
yi = y >> 16; yi = y >> 16;
uv_yi = yi >> kYShift; uv_yi = yi >> kYShift;
src_row_y = src_y + yi * src_stride_y; src_row_y = src_y + yi * (int64_t)src_stride_y;
src_row_u = src_u + uv_yi * src_stride_u; src_row_u = src_u + uv_yi * (int64_t)src_stride_u;
src_row_v = src_v + uv_yi * src_stride_v; src_row_v = src_v + uv_yi * (int64_t)src_stride_v;
} }
if (yi != lasty) { if (yi != lasty) {
// TODO(fbarchard): Convert the clipped region of row. // TODO(fbarchard): Convert the clipped region of row.
@ -888,7 +888,7 @@ static void ScaleARGBSimple(int src_width,
} }
for (j = 0; j < dst_height; ++j) { for (j = 0; j < dst_height; ++j) {
ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride, dst_width, x, ScaleARGBCols(dst_argb, src_argb + (y >> 16) * (int64_t)src_stride, dst_width, x,
dx); dx);
dst_argb += dst_stride; dst_argb += dst_stride;
y += dy; y += dy;
@ -924,7 +924,7 @@ static void ScaleARGB(const uint8_t* src,
// Negative src_height means invert the image. // Negative src_height means invert the image.
if (src_height < 0) { if (src_height < 0) {
src_height = -src_height; src_height = -src_height;
src = src + (src_height - 1) * src_stride; src = src + (src_height - 1) * (int64_t)src_stride;
src_stride = -src_stride; src_stride = -src_stride;
} }
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
@ -939,7 +939,7 @@ static void ScaleARGB(const uint8_t* src,
if (clip_y) { if (clip_y) {
int64_t clipf = (int64_t)(clip_y)*dy; int64_t clipf = (int64_t)(clip_y)*dy;
y += (clipf & 0xffff); y += (clipf & 0xffff);
src += (clipf >> 16) * src_stride; src += (clipf >> 16) * (int64_t)src_stride;
dst += clip_y * dst_stride; dst += clip_y * dst_stride;
} }
@ -973,7 +973,7 @@ static void ScaleARGB(const uint8_t* src,
filtering = kFilterNone; filtering = kFilterNone;
if (dx == 0x10000 && dy == 0x10000) { if (dx == 0x10000 && dy == 0x10000) {
// Straight copy. // Straight copy.
ARGBCopy(src + (y >> 16) * src_stride + (x >> 16) * 4, src_stride, ARGBCopy(src + (y >> 16) * (int64_t)src_stride + (x >> 16) * 4, src_stride,
dst, dst_stride, clip_width, clip_height); dst, dst_stride, clip_width, clip_height);
return; return;
} }

View File

@ -83,9 +83,9 @@ static void ScaleUVDown2(int src_width,
assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2. assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
// Advance to odd row, even column. // Advance to odd row, even column.
if (filtering == kFilterBilinear) { if (filtering == kFilterBilinear) {
src_uv += (y >> 16) * src_stride + (x >> 16) * 2; src_uv += (y >> 16) * (int64_t)src_stride + (x >> 16) * 2;
} else { } else {
src_uv += (y >> 16) * src_stride + ((x >> 16) - 1) * 2; src_uv += (y >> 16) * (int64_t)src_stride + ((x >> 16) - 1) * 2;
} }
#if defined(HAS_SCALEUVROWDOWN2BOX_SSSE3) #if defined(HAS_SCALEUVROWDOWN2BOX_SSSE3)
@ -216,7 +216,7 @@ static void ScaleUVDown4Box(int src_width,
uint8_t* dst_uv, int dst_width) = uint8_t* dst_uv, int dst_width) =
ScaleUVRowDown2Box_C; ScaleUVRowDown2Box_C;
// Advance to odd row, even column. // Advance to odd row, even column.
src_uv += (y >> 16) * src_stride + (x >> 16) * 2; src_uv += (y >> 16) * (int64_t)src_stride + (x >> 16) * 2;
(void)src_width; (void)src_width;
(void)src_height; (void)src_height;
(void)dx; (void)dx;
@ -279,7 +279,7 @@ static void ScaleUVDownEven(int src_width,
enum FilterMode filtering) { enum FilterMode filtering) {
int j; int j;
int col_step = dx >> 16; int col_step = dx >> 16;
int row_stride = (dy >> 16) * src_stride; int row_stride = (dy >> 16) * (int64_t)src_stride;
void (*ScaleUVRowDownEven)(const uint8_t* src_uv, ptrdiff_t src_stride, void (*ScaleUVRowDownEven)(const uint8_t* src_uv, ptrdiff_t src_stride,
int src_step, uint8_t* dst_uv, int dst_width) = int src_step, uint8_t* dst_uv, int dst_width) =
filtering ? ScaleUVRowDownEvenBox_C : ScaleUVRowDownEven_C; filtering ? ScaleUVRowDownEvenBox_C : ScaleUVRowDownEven_C;
@ -287,7 +287,7 @@ static void ScaleUVDownEven(int src_width,
(void)src_height; (void)src_height;
assert(IS_ALIGNED(src_width, 2)); assert(IS_ALIGNED(src_width, 2));
assert(IS_ALIGNED(src_height, 2)); assert(IS_ALIGNED(src_height, 2));
src_uv += (y >> 16) * src_stride + (x >> 16) * 2; src_uv += (y >> 16) * (int64_t)src_stride + (x >> 16) * 2;
#if defined(HAS_SCALEUVROWDOWNEVEN_SSSE3) #if defined(HAS_SCALEUVROWDOWNEVEN_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) { if (TestCpuFlag(kCpuHasSSSE3)) {
ScaleUVRowDownEven = filtering ? ScaleUVRowDownEvenBox_Any_SSSE3 ScaleUVRowDownEven = filtering ? ScaleUVRowDownEvenBox_Any_SSSE3
@ -447,7 +447,7 @@ static void ScaleUVBilinearDown(int src_width,
} }
for (j = 0; j < dst_height; ++j) { for (j = 0; j < dst_height; ++j) {
int yi = y >> 16; int yi = y >> 16;
const uint8_t* src = src_uv + yi * src_stride; const uint8_t* src = src_uv + yi * (int64_t)src_stride;
if (filtering == kFilterLinear) { if (filtering == kFilterLinear) {
ScaleUVFilterCols(dst_uv, src, dst_width, x, dx); ScaleUVFilterCols(dst_uv, src, dst_width, x, dx);
} else { } else {
@ -602,7 +602,7 @@ static void ScaleUVBilinearUp(int src_width,
{ {
int yi = y >> 16; int yi = y >> 16;
const uint8_t* src = src_uv + yi * src_stride; const uint8_t* src = src_uv + yi * (int64_t)src_stride;
// Allocate 2 rows of UV. // Allocate 2 rows of UV.
const int kRowSize = (dst_width * 2 + 15) & ~15; const int kRowSize = (dst_width * 2 + 15) & ~15;
@ -625,7 +625,7 @@ static void ScaleUVBilinearUp(int src_width,
if (y > max_y) { if (y > max_y) {
y = max_y; y = max_y;
yi = y >> 16; yi = y >> 16;
src = src_uv + yi * src_stride; src = src_uv + yi * (int64_t)src_stride;
} }
if (yi != lasty) { if (yi != lasty) {
ScaleUVFilterCols(rowptr, src, dst_width, x, dx); ScaleUVFilterCols(rowptr, src, dst_width, x, dx);
@ -690,12 +690,12 @@ void ScaleUVLinearUp2(int src_width,
#endif #endif
if (dst_height == 1) { if (dst_height == 1) {
ScaleRowUp(src_uv + ((src_height - 1) / 2) * src_stride, dst_uv, dst_width); ScaleRowUp(src_uv + ((src_height - 1) / 2) * (int64_t)src_stride, dst_uv, dst_width);
} else { } else {
dy = FixedDiv(src_height - 1, dst_height - 1); dy = FixedDiv(src_height - 1, dst_height - 1);
y = (1 << 15) - 1; y = (1 << 15) - 1;
for (i = 0; i < dst_height; ++i) { for (i = 0; i < dst_height; ++i) {
ScaleRowUp(src_uv + (y >> 16) * src_stride, dst_uv, dst_width); ScaleRowUp(src_uv + (y >> 16) * (int64_t)src_stride, dst_uv, dst_width);
dst_uv += dst_stride; dst_uv += dst_stride;
y += dy; y += dy;
} }
@ -796,12 +796,12 @@ void ScaleUVLinearUp2_16(int src_width,
#endif #endif
if (dst_height == 1) { if (dst_height == 1) {
ScaleRowUp(src_uv + ((src_height - 1) / 2) * src_stride, dst_uv, dst_width); ScaleRowUp(src_uv + ((src_height - 1) / 2) * (int64_t)src_stride, dst_uv, dst_width);
} else { } else {
dy = FixedDiv(src_height - 1, dst_height - 1); dy = FixedDiv(src_height - 1, dst_height - 1);
y = (1 << 15) - 1; y = (1 << 15) - 1;
for (i = 0; i < dst_height; ++i) { for (i = 0; i < dst_height; ++i) {
ScaleRowUp(src_uv + (y >> 16) * src_stride, dst_uv, dst_width); ScaleRowUp(src_uv + (y >> 16) * (int64_t)src_stride, dst_uv, dst_width);
dst_uv += dst_stride; dst_uv += dst_stride;
y += dy; y += dy;
} }
@ -927,7 +927,7 @@ static void ScaleUVSimple(int src_width,
} }
for (j = 0; j < dst_height; ++j) { for (j = 0; j < dst_height; ++j) {
ScaleUVCols(dst_uv, src_uv + (y >> 16) * src_stride, dst_width, x, dx); ScaleUVCols(dst_uv, src_uv + (y >> 16) * (int64_t)src_stride, dst_width, x, dx);
dst_uv += dst_stride; dst_uv += dst_stride;
y += dy; y += dy;
} }
@ -935,43 +935,43 @@ static void ScaleUVSimple(int src_width,
// Copy UV with optional flipping // Copy UV with optional flipping
#if HAS_UVCOPY #if HAS_UVCOPY
static int UVCopy(const uint8_t* src_UV, static int UVCopy(const uint8_t* src_uv,
int src_stride_uv, int src_stride_uv,
uint8_t* dst_UV, uint8_t* dst_uv,
int dst_stride_uv, int dst_stride_uv,
int width, int width,
int height) { int height) {
if (!src_UV || !dst_UV || width <= 0 || height == 0) { if (!src_uv || !dst_uv || width <= 0 || height == 0) {
return -1; return -1;
} }
// Negative height means invert the image. // Negative height means invert the image.
if (height < 0) { if (height < 0) {
height = -height; height = -height;
src_UV = src_UV + (height - 1) * src_stride_uv; src_uv = src_uv + (height - 1) * (int64_t)src_stride_uv;
src_stride_uv = -src_stride_uv; src_stride_uv = -src_stride_uv;
} }
CopyPlane(src_UV, src_stride_uv, dst_UV, dst_stride_uv, width * 2, height); CopyPlane(src_uv, src_stride_uv, dst_uv, dst_stride_uv, width * 2, height);
return 0; return 0;
} }
static int UVCopy_16(const uint16_t* src_UV, static int UVCopy_16(const uint16_t* src_uv,
int src_stride_uv, int src_stride_uv,
uint16_t* dst_UV, uint16_t* dst_uv,
int dst_stride_uv, int dst_stride_uv,
int width, int width,
int height) { int height) {
if (!src_UV || !dst_UV || width <= 0 || height == 0) { if (!src_uv || !dst_uv || width <= 0 || height == 0) {
return -1; return -1;
} }
// Negative height means invert the image. // Negative height means invert the image.
if (height < 0) { if (height < 0) {
height = -height; height = -height;
src_UV = src_UV + (height - 1) * src_stride_uv; src_uv = src_uv + (height - 1) * (int64_t)src_stride_uv;
src_stride_uv = -src_stride_uv; src_stride_uv = -src_stride_uv;
} }
CopyPlane_16(src_UV, src_stride_uv, dst_UV, dst_stride_uv, width * 2, height); CopyPlane_16(src_uv, src_stride_uv, dst_uv, dst_stride_uv, width * 2, height);
return 0; return 0;
} }
#endif // HAS_UVCOPY #endif // HAS_UVCOPY
@ -1005,7 +1005,7 @@ static void ScaleUV(const uint8_t* src,
// Negative src_height means invert the image. // Negative src_height means invert the image.
if (src_height < 0) { if (src_height < 0) {
src_height = -src_height; src_height = -src_height;
src = src + (src_height - 1) * src_stride; src = src + (src_height - 1) * (int64_t)src_stride;
src_stride = -src_stride; src_stride = -src_stride;
} }
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
@ -1020,7 +1020,7 @@ static void ScaleUV(const uint8_t* src,
if (clip_y) { if (clip_y) {
int64_t clipf = (int64_t)(clip_y)*dy; int64_t clipf = (int64_t)(clip_y)*dy;
y += (clipf & 0xffff); y += (clipf & 0xffff);
src += (clipf >> 16) * src_stride; src += (clipf >> 16) * (int64_t)src_stride;
dst += clip_y * dst_stride; dst += clip_y * dst_stride;
} }
@ -1061,7 +1061,7 @@ static void ScaleUV(const uint8_t* src,
#ifdef HAS_UVCOPY #ifdef HAS_UVCOPY
if (dx == 0x10000 && dy == 0x10000) { if (dx == 0x10000 && dy == 0x10000) {
// Straight copy. // Straight copy.
UVCopy(src + (y >> 16) * src_stride + (x >> 16) * 2, src_stride, dst, UVCopy(src + (y >> 16) * (int64_t)src_stride + (x >> 16) * 2, src_stride, dst,
dst_stride, clip_width, clip_height); dst_stride, clip_width, clip_height);
return; return;
} }
@ -1155,7 +1155,7 @@ int UVScale_16(const uint16_t* src_uv,
// Negative src_height means invert the image. // Negative src_height means invert the image.
if (src_height < 0) { if (src_height < 0) {
src_height = -src_height; src_height = -src_height;
src_uv = src_uv + (src_height - 1) * src_stride_uv; src_uv = src_uv + (src_height - 1) * (int64_t)src_stride_uv;
src_stride_uv = -src_stride_uv; src_stride_uv = -src_stride_uv;
} }
src_width = Abs(src_width); src_width = Abs(src_width);
@ -1163,11 +1163,11 @@ int UVScale_16(const uint16_t* src_uv,
#ifdef HAS_UVCOPY #ifdef HAS_UVCOPY
if (!filtering && src_width == dst_width && (src_height % dst_height == 0)) { if (!filtering && src_width == dst_width && (src_height % dst_height == 0)) {
if (dst_height == 1) { if (dst_height == 1) {
UVCopy_16(src_uv + ((src_height - 1) / 2) * src_stride_uv, src_stride_uv, UVCopy_16(src_uv + ((src_height - 1) / 2) * (int64_t)src_stride_uv, src_stride_uv,
dst_uv, dst_stride_uv, dst_width, dst_height); dst_uv, dst_stride_uv, dst_width, dst_height);
} else { } else {
dy = src_height / dst_height; dy = src_height / dst_height;
UVCopy_16(src_uv + src_stride_uv * ((dy - 1) / 2), src_stride_uv * dy, UVCopy_16(src_uv + ((dy - 1) / 2) * (int64_t)src_stride_uv, dy * (int64_t)src_stride_uv,
dst_uv, dst_stride_uv, dst_width, dst_height); dst_uv, dst_stride_uv, dst_width, dst_height);
} }

View File

@ -1591,20 +1591,6 @@ TESTEND(BGRAToARGB, uint8_t, 4, 4, 1)
TESTEND(ABGRToARGB, uint8_t, 4, 4, 1) TESTEND(ABGRToARGB, uint8_t, 4, 4, 1)
TESTEND(AB64ToAR64, uint16_t, 4, 4, 1) TESTEND(AB64ToAR64, uint16_t, 4, 4, 1)
TEST_F(LibYUVConvertTest, Test565) {
SIMD_ALIGNED(uint8_t orig_pixels[256][4]);
SIMD_ALIGNED(uint8_t pixels565[256][2]);
for (int i = 0; i < 256; ++i) {
for (int j = 0; j < 4; ++j) {
orig_pixels[i][j] = i;
}
}
ARGBToRGB565(&orig_pixels[0][0], 0, &pixels565[0][0], 0, 256, 1);
uint32_t checksum = HashDjb2(&pixels565[0][0], sizeof(pixels565), 5381);
EXPECT_EQ(610919429u, checksum);
}
#ifdef HAVE_JPEG #ifdef HAVE_JPEG
TEST_F(LibYUVConvertTest, ValidateJpeg) { TEST_F(LibYUVConvertTest, ValidateJpeg) {
const int kOff = 10; const int kOff = 10;
@ -3831,10 +3817,11 @@ TEST_F(LibYUVConvertTest, TestH420ToARGB) {
++histogram_b[b]; ++histogram_b[b];
++histogram_g[g]; ++histogram_g[g];
++histogram_r[r]; ++histogram_r[r];
int expected_y = Clamp(static_cast<int>((i - 16) * 1.164f)); // Reference formula for Y channel contribution in YUV to RGB conversions:
EXPECT_NEAR(b, expected_y, 1); int expected_y = Clamp(static_cast<int>((i - 16) * 1.164f + 0.5f));
EXPECT_NEAR(g, expected_y, 1); EXPECT_EQ(b, expected_y);
EXPECT_NEAR(r, expected_y, 1); EXPECT_EQ(g, expected_y);
EXPECT_EQ(r, expected_y);
EXPECT_EQ(a, 255); EXPECT_EQ(a, 255);
} }
@ -3956,7 +3943,7 @@ TEST_F(LibYUVConvertTest, TestH010ToAR30) {
++histogram_b[b10]; ++histogram_b[b10];
++histogram_g[g10]; ++histogram_g[g10];
++histogram_r[r10]; ++histogram_r[r10];
int expected_y = Clamp10(static_cast<int>((i - 64) * 1.164f)); int expected_y = Clamp10(static_cast<int>((i - 64) * 1.164f + 0.5));
EXPECT_NEAR(b10, expected_y, 4); EXPECT_NEAR(b10, expected_y, 4);
EXPECT_NEAR(g10, expected_y, 4); EXPECT_NEAR(g10, expected_y, 4);
EXPECT_NEAR(r10, expected_y, 4); EXPECT_NEAR(r10, expected_y, 4);
@ -4133,6 +4120,48 @@ TEST_F(LibYUVConvertTest, TestARGBToRGB24) {
free_aligned_buffer_page_end(dest_rgb24); free_aligned_buffer_page_end(dest_rgb24);
} }
TEST_F(LibYUVConvertTest, Test565) {
SIMD_ALIGNED(uint8_t orig_pixels[256][4]);
SIMD_ALIGNED(uint8_t pixels565[256][2]);
for (int i = 0; i < 256; ++i) {
for (int j = 0; j < 4; ++j) {
orig_pixels[i][j] = i;
}
}
ARGBToRGB565(&orig_pixels[0][0], 0, &pixels565[0][0], 0, 256, 1);
uint32_t checksum = HashDjb2(&pixels565[0][0], sizeof(pixels565), 5381);
EXPECT_EQ(610919429u, checksum);
}
// Test RGB24 to J420 is exact
#if defined(LIBYUV_BIT_EXACT)
TEST_F(LibYUVConvertTest, TestRGB24ToJ420) {
const int kSize = 256;
align_buffer_page_end(orig_rgb24, kSize * 3 * 2); // 2 rows of RGB24
align_buffer_page_end(dest_j420, kSize * 3 / 2 * 2);
int iterations256 = (benchmark_width_ * benchmark_height_ + (kSize * 2 - 1)) / (kSize * 2) * benchmark_iterations_;
for (int i = 0; i < kSize * 3 * 2; ++i) {
orig_rgb24[i] = i;
}
for (int i = 0; i < iterations256; ++i) {
RGB24ToJ420(orig_rgb24, kSize * 3,
dest_j420, kSize, // Y plane
dest_j420 + kSize * 2, kSize / 2, // U plane
dest_j420 + kSize * 5 / 2, kSize / 2, // V plane
kSize, 2);
}
uint32_t checksum = HashDjb2(dest_j420, kSize * 3 / 2 * 2, 5381);
EXPECT_EQ(2755440272u, checksum);
free_aligned_buffer_page_end(orig_rgb24);
free_aligned_buffer_page_end(dest_j420);
}
#endif
// Test I400 with jpeg matrix is same as J400 // Test I400 with jpeg matrix is same as J400
TEST_F(LibYUVConvertTest, TestI400) { TEST_F(LibYUVConvertTest, TestI400) {
const int kSize = 256; const int kSize = 256;