Casting for scale functions

- MT2T support for source strides added, but only works for positive values.
- Reduced casting in row_common - one cast per assignment.
- scaling functions use intptr_t for intermediate calculations, then cast strides to ptrdiff_t

Bug: libyuv:948, b/257266635, b/262468594
Change-Id: I0409a0ce916b777da2a01c0ab0b56dccefed3b33
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4102203
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Reviewed-by: Wan-Teh Chang <wtc@google.com>
Reviewed-by: Justin Green <greenjustin@google.com>
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
Reviewed-by: Ernest Hua <ernesthua@google.com>
This commit is contained in:
Frank Barchard 2022-12-15 14:11:52 -08:00 committed by libyuv LUCI CQ
parent 610e0cdead
commit 3abd6f36b6
12 changed files with 694 additions and 615 deletions

View File

@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1852
Version: 1854
License: BSD
License File: LICENSE

View File

@ -163,10 +163,13 @@ int MM21ToYUY2(const uint8_t* src_y,
int height);
// Convert MT2T to P010
// Note that src_y and src_uv point to packed 10-bit values, so the Y plane will
// be 10 / 8 times the dimensions of the image. Also for this reason,
// src_stride_y and src_stride_uv are given in bytes.
LIBYUV_API
int MT2TToP010(const uint16_t* src_y,
int MT2TToP010(const uint8_t* src_y,
int src_stride_y,
const uint16_t* src_uv,
const uint8_t* src_uv,
int src_stride_uv,
uint16_t* dst_y,
int dst_stride_y,

View File

@ -2123,8 +2123,8 @@ void DetileToYUY2_Any_NEON(const uint8_t* src_y,
ptrdiff_t src_uv_tile_stride,
uint8_t* dst_yuy2,
int width);
void UnpackMT2T_C(const uint16_t* src, uint16_t* dst, size_t size);
void UnpackMT2T_NEON(const uint16_t* src, uint16_t* dst, size_t size);
void UnpackMT2T_C(const uint8_t* src, uint16_t* dst, size_t size);
void UnpackMT2T_NEON(const uint8_t* src, uint16_t* dst, size_t size);
void MergeUVRow_C(const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_uv,

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1852
#define LIBYUV_VERSION 1854
#endif // INCLUDE_LIBYUV_VERSION_H_

View File

@ -735,12 +735,10 @@ int MM21ToYUY2(const uint8_t* src_y,
// Convert MT2T into P010. See tinyurl.com/mtk-10bit-video-format for format
// documentation.
// TODO(greenjustin): Add an MT2T to I420 conversion.
// TODO(greenjustin): Investigate if there are valid stride parameters other
// than width.
LIBYUV_API
int MT2TToP010(const uint16_t* src_y,
int MT2TToP010(const uint8_t* src_y,
int src_stride_y,
const uint16_t* src_uv,
const uint8_t* src_uv,
int src_stride_uv,
uint16_t* dst_y,
int dst_stride_y,
@ -748,48 +746,75 @@ int MT2TToP010(const uint16_t* src_y,
int dst_stride_uv,
int width,
int height) {
if (width <= 0 || height <= 0 || !src_y || !src_uv || !dst_y || !dst_uv) {
if (width <= 0 || !height || !src_uv || !dst_uv) {
return -1;
}
// TODO(greenjustin): Investigate if we can allow arbitrary sizes. This may
// not be semantically meaningful in this format, but we do not have samples
// of unaligned data to conclude that yet. This format is 16x32 tiled, so we
// must pad the width and height to reflect that.
int aligned_width = (width + 15) & ~15;
int aligned_height = (height + 31) & ~31;
{
size_t y_size = aligned_width * aligned_height * 10 / 8;
size_t uv_size = aligned_width * ((aligned_height + 1) / 2) * 10 / 8;
size_t tmp_y_size = aligned_width * aligned_height * sizeof(uint16_t);
size_t tmp_uv_size =
aligned_width * ((aligned_height + 1) / 2) * sizeof(uint16_t);
void (*UnpackMT2T)(const uint16_t* src, uint16_t* dst, size_t size) =
int u_width = (width + 1) / 2;
int uv_width = 2 * u_width;
int y = 0;
int uv_height = uv_height = (height + 1) / 2;
const int tile_width = 16;
const int y_tile_height = 32;
const int uv_tile_height = 16;
int padded_width = (width + tile_width - 1) & ~(tile_width - 1);
int y_tile_row_size = padded_width * y_tile_height * 10 / 8;
int uv_tile_row_size = padded_width * uv_tile_height * 10 / 8;
size_t row_buf_size = padded_width * y_tile_height * sizeof(uint16_t);
void (*UnpackMT2T)(const uint8_t* src, uint16_t* dst, size_t size) =
UnpackMT2T_C;
align_buffer_64(tmp_y, tmp_y_size);
align_buffer_64(tmp_uv, tmp_uv_size);
align_buffer_64(row_buf, row_buf_size);
#if defined(HAS_UNPACKMT2T_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
UnpackMT2T = UnpackMT2T_NEON;
}
#endif
// Negative height means invert the image.
if (height < 0) {
height = -height;
uv_height = (height + 1) / 2;
if (dst_y) {
dst_y = dst_y + (height - 1) * dst_stride_y;
dst_stride_y = -dst_stride_y;
}
dst_uv = dst_uv + (uv_height - 1) * dst_stride_uv;
dst_stride_uv = -dst_stride_uv;
}
// TODO(greenjustin): Unpack and detile in rows rather than planes to keep
// the caches hot.
UnpackMT2T(src_y, (uint16_t*)tmp_y, y_size);
UnpackMT2T(src_uv, (uint16_t*)tmp_uv, uv_size);
// Unpack and detile Y in rows of tiles
if (src_y && dst_y) {
for (y = 0; y < (height & ~(y_tile_height - 1)); y += y_tile_height) {
UnpackMT2T(src_y, (uint16_t*)row_buf, y_tile_row_size);
DetilePlane_16((uint16_t*)row_buf, padded_width, dst_y, dst_stride_y,
width, y_tile_height, y_tile_height);
src_y += src_stride_y * y_tile_height;
dst_y += dst_stride_y * y_tile_height;
}
if (height & (y_tile_height - 1)) {
UnpackMT2T(src_y, (uint16_t*)row_buf, y_tile_row_size);
DetilePlane_16((uint16_t*)row_buf, padded_width, dst_y, dst_stride_y,
width, height & (y_tile_height - 1), y_tile_height);
}
}
DetilePlane_16((uint16_t*)tmp_y, src_stride_y, dst_y, dst_stride_y, width,
height, 32);
DetilePlane_16((uint16_t*)tmp_uv, src_stride_uv, dst_uv, dst_stride_uv,
width, (height + 1) / 2, 16);
free_aligned_buffer_64(tmp_y);
free_aligned_buffer_64(tmp_uv);
// Unpack and detile UV plane
for (y = 0; y < (uv_height & ~(uv_tile_height - 1)); y += uv_tile_height) {
UnpackMT2T(src_uv, (uint16_t*)row_buf, uv_tile_row_size);
DetilePlane_16((uint16_t*)row_buf, padded_width, dst_uv, dst_stride_uv,
uv_width, uv_tile_height, uv_tile_height);
src_uv += src_stride_uv * uv_tile_height;
dst_uv += dst_stride_uv * uv_tile_height;
}
if (uv_height & (uv_tile_height - 1)) {
UnpackMT2T(src_uv, (uint16_t*)row_buf, uv_tile_row_size);
DetilePlane_16((uint16_t*)row_buf, padded_width, dst_uv, dst_stride_uv,
uv_width, uv_height & (uv_tile_height - 1),
uv_tile_height);
}
free_aligned_buffer_64(row_buf);
}
return 0;
}

File diff suppressed because it is too large Load Diff

View File

@ -720,9 +720,9 @@ void DetileToYUY2_NEON(const uint8_t* src_y,
}
#endif
void UnpackMT2T_NEON(const uint16_t* src, uint16_t* dst, size_t size) {
const uint16_t* src_lower_bits = src;
const uint16_t* src_upper_bits = src + 8;
void UnpackMT2T_NEON(const uint8_t* src, uint16_t* dst, size_t size) {
const uint8_t* src_lower_bits = src;
const uint8_t* src_upper_bits = src + 16;
asm volatile(
"1: \n"
"vld4.8 {d1, d3, d5, d7}, [%1]! \n" // Load 32 bytes of upper

View File

@ -751,9 +751,9 @@ void DetileToYUY2_NEON(const uint8_t* src_y,
// Unpack MT2T into tiled P010 64 pixels at a time. See
// tinyurl.com/mtk-10bit-video-format for format documentation.
void UnpackMT2T_NEON(const uint16_t* src, uint16_t* dst, size_t size) {
const uint16_t* src_lower_bits = src;
const uint16_t* src_upper_bits = src + 8;
void UnpackMT2T_NEON(const uint8_t* src, uint16_t* dst, size_t size) {
const uint8_t* src_lower_bits = src;
const uint8_t* src_upper_bits = src + 16;
asm volatile(
"1: \n"
"ld4 {v0.8b, v1.8b, v2.8b, v3.8b}, [%1], #32 \n"

View File

@ -775,9 +775,9 @@ static void ScaleAddCols2_C(int dst_width,
int ix = x >> 16;
x += dx;
boxwidth = MIN1((x >> 16) - ix);
*dst_ptr++ =
SumPixels(boxwidth, src_ptr + ix) * scaletbl[boxwidth - minboxwidth] >>
16;
*dst_ptr++ = (uint8_t)(SumPixels(boxwidth, src_ptr + ix) *
scaletbl[boxwidth - minboxwidth] >>
16);
}
}
@ -814,7 +814,7 @@ static void ScaleAddCols0_C(int dst_width,
(void)dx;
src_ptr += (x >> 16);
for (i = 0; i < dst_width; ++i) {
*dst_ptr++ = src_ptr[i] * scaleval >> 16;
*dst_ptr++ = (uint8_t)(src_ptr[i] * scaleval >> 16);
}
}
@ -829,7 +829,7 @@ static void ScaleAddCols1_C(int dst_width,
int i;
x >>= 16;
for (i = 0; i < dst_width; ++i) {
*dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16;
*dst_ptr++ = (uint8_t)(SumPixels(boxwidth, src_ptr + x) * scaleval >> 16);
x += boxwidth;
}
}

View File

@ -58,9 +58,9 @@ static void ScaleARGBDown2(int src_width,
assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
// Advance to odd row, even column.
if (filtering == kFilterBilinear) {
src_argb += (y >> 16) * (int64_t)src_stride + (x >> 16) * 4;
src_argb += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4;
} else {
src_argb += (y >> 16) * (int64_t)src_stride + ((x >> 16) - 1) * 4;
src_argb += (y >> 16) * (intptr_t)src_stride + ((x >> 16) - 1) * 4;
}
#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
@ -162,7 +162,7 @@ static void ScaleARGBDown4Box(int src_width,
uint8_t* dst_argb, int dst_width) =
ScaleARGBRowDown2Box_C;
// Advance to odd row, even column.
src_argb += (y >> 16) * (int64_t)src_stride + (x >> 16) * 4;
src_argb += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4;
(void)src_width;
(void)src_height;
(void)dx;
@ -214,7 +214,7 @@ static void ScaleARGBDownEven(int src_width,
enum FilterMode filtering) {
int j;
int col_step = dx >> 16;
int row_stride = (dy >> 16) * (int64_t)src_stride;
ptrdiff_t row_stride = (ptrdiff_t)((dy >> 16) * (intptr_t)src_stride);
void (*ScaleARGBRowDownEven)(const uint8_t* src_argb, ptrdiff_t src_stride,
int src_step, uint8_t* dst_argb, int dst_width) =
filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
@ -222,7 +222,7 @@ static void ScaleARGBDownEven(int src_width,
(void)src_height;
assert(IS_ALIGNED(src_width, 2));
assert(IS_ALIGNED(src_height, 2));
src_argb += (y >> 16) * (int64_t)src_stride + (x >> 16) * 4;
src_argb += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4;
#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2
@ -388,7 +388,7 @@ static void ScaleARGBBilinearDown(int src_width,
}
for (j = 0; j < dst_height; ++j) {
int yi = y >> 16;
const uint8_t* src = src_argb + yi * (int64_t)src_stride;
const uint8_t* src = src_argb + yi * (intptr_t)src_stride;
if (filtering == kFilterLinear) {
ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx);
} else {
@ -545,7 +545,7 @@ static void ScaleARGBBilinearUp(int src_width,
{
int yi = y >> 16;
const uint8_t* src = src_argb + yi * (int64_t)src_stride;
const uint8_t* src = src_argb + yi * (intptr_t)src_stride;
// Allocate 2 rows of ARGB.
const int row_size = (dst_width * 4 + 31) & ~31;
@ -570,7 +570,7 @@ static void ScaleARGBBilinearUp(int src_width,
if (y > max_y) {
y = max_y;
yi = y >> 16;
src = src_argb + yi * (int64_t)src_stride;
src = src_argb + yi * (intptr_t)src_stride;
}
if (yi != lasty) {
ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
@ -793,9 +793,9 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate.
int yi = y >> 16;
int uv_yi = yi >> kYShift;
const uint8_t* src_row_y = src_y + yi * (int64_t)src_stride_y;
const uint8_t* src_row_u = src_u + uv_yi * (int64_t)src_stride_u;
const uint8_t* src_row_v = src_v + uv_yi * (int64_t)src_stride_v;
const uint8_t* src_row_y = src_y + yi * (intptr_t)src_stride_y;
const uint8_t* src_row_u = src_u + uv_yi * (intptr_t)src_stride_u;
const uint8_t* src_row_v = src_v + uv_yi * (intptr_t)src_stride_v;
// Allocate 2 rows of ARGB.
const int row_size = (dst_width * 4 + 31) & ~31;
@ -833,9 +833,9 @@ static void ScaleYUVToARGBBilinearUp(int src_width,
y = max_y;
yi = y >> 16;
uv_yi = yi >> kYShift;
src_row_y = src_y + yi * (int64_t)src_stride_y;
src_row_u = src_u + uv_yi * (int64_t)src_stride_u;
src_row_v = src_v + uv_yi * (int64_t)src_stride_v;
src_row_y = src_y + yi * (intptr_t)src_stride_y;
src_row_u = src_u + uv_yi * (intptr_t)src_stride_u;
src_row_v = src_v + uv_yi * (intptr_t)src_stride_v;
}
if (yi != lasty) {
// TODO(fbarchard): Convert the clipped region of row.
@ -926,7 +926,7 @@ static void ScaleARGBSimple(int src_width,
}
for (j = 0; j < dst_height; ++j) {
ScaleARGBCols(dst_argb, src_argb + (y >> 16) * (int64_t)src_stride,
ScaleARGBCols(dst_argb, src_argb + (y >> 16) * (intptr_t)src_stride,
dst_width, x, dx);
dst_argb += dst_stride;
y += dy;
@ -962,7 +962,7 @@ static void ScaleARGB(const uint8_t* src,
// Negative src_height means invert the image.
if (src_height < 0) {
src_height = -src_height;
src = src + (src_height - 1) * (int64_t)src_stride;
src = src + (src_height - 1) * (intptr_t)src_stride;
src_stride = -src_stride;
}
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
@ -977,7 +977,7 @@ static void ScaleARGB(const uint8_t* src,
if (clip_y) {
int64_t clipf = (int64_t)(clip_y)*dy;
y += (clipf & 0xffff);
src += (clipf >> 16) * (int64_t)src_stride;
src += (clipf >> 16) * (intptr_t)src_stride;
dst += clip_y * dst_stride;
}
@ -1011,7 +1011,7 @@ static void ScaleARGB(const uint8_t* src,
filtering = kFilterNone;
if (dx == 0x10000 && dy == 0x10000) {
// Straight copy.
ARGBCopy(src + (y >> 16) * (int64_t)src_stride + (x >> 16) * 4,
ARGBCopy(src + (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4,
src_stride, dst, dst_stride, clip_width, clip_height);
return;
}

View File

@ -83,9 +83,9 @@ static void ScaleUVDown2(int src_width,
assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
// Advance to odd row, even column.
if (filtering == kFilterBilinear) {
src_uv += (y >> 16) * (int64_t)src_stride + (x >> 16) * 2;
src_uv += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2;
} else {
src_uv += (y >> 16) * (int64_t)src_stride + ((x >> 16) - 1) * 2;
src_uv += (y >> 16) * (intptr_t)src_stride + ((x >> 16) - 1) * 2;
}
#if defined(HAS_SCALEUVROWDOWN2BOX_SSSE3)
@ -200,7 +200,7 @@ static void ScaleUVDown4Box(int src_width,
uint8_t* dst_uv, int dst_width) =
ScaleUVRowDown2Box_C;
// Advance to odd row, even column.
src_uv += (y >> 16) * (int64_t)src_stride + (x >> 16) * 2;
src_uv += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2;
(void)src_width;
(void)src_height;
(void)dx;
@ -263,7 +263,7 @@ static void ScaleUVDownEven(int src_width,
enum FilterMode filtering) {
int j;
int col_step = dx >> 16;
int row_stride = (dy >> 16) * (int64_t)src_stride;
ptrdiff_t row_stride = (ptrdiff_t)((dy >> 16) * (intptr_t)src_stride);
void (*ScaleUVRowDownEven)(const uint8_t* src_uv, ptrdiff_t src_stride,
int src_step, uint8_t* dst_uv, int dst_width) =
filtering ? ScaleUVRowDownEvenBox_C : ScaleUVRowDownEven_C;
@ -271,7 +271,7 @@ static void ScaleUVDownEven(int src_width,
(void)src_height;
assert(IS_ALIGNED(src_width, 2));
assert(IS_ALIGNED(src_height, 2));
src_uv += (y >> 16) * (int64_t)src_stride + (x >> 16) * 2;
src_uv += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2;
#if defined(HAS_SCALEUVROWDOWNEVEN_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ScaleUVRowDownEven = filtering ? ScaleUVRowDownEvenBox_Any_SSSE3
@ -429,7 +429,7 @@ static void ScaleUVBilinearDown(int src_width,
}
for (j = 0; j < dst_height; ++j) {
int yi = y >> 16;
const uint8_t* src = src_uv + yi * (int64_t)src_stride;
const uint8_t* src = src_uv + yi * (intptr_t)src_stride;
if (filtering == kFilterLinear) {
ScaleUVFilterCols(dst_uv, src, dst_width, x, dx);
} else {
@ -571,7 +571,7 @@ static void ScaleUVBilinearUp(int src_width,
{
int yi = y >> 16;
const uint8_t* src = src_uv + yi * (int64_t)src_stride;
const uint8_t* src = src_uv + yi * (intptr_t)src_stride;
// Allocate 2 rows of UV.
const int row_size = (dst_width * 2 + 15) & ~15;
@ -596,7 +596,7 @@ static void ScaleUVBilinearUp(int src_width,
if (y > max_y) {
y = max_y;
yi = y >> 16;
src = src_uv + yi * (int64_t)src_stride;
src = src_uv + yi * (intptr_t)src_stride;
}
if (yi != lasty) {
ScaleUVFilterCols(rowptr, src, dst_width, x, dx);
@ -663,13 +663,13 @@ void ScaleUVLinearUp2(int src_width,
#endif
if (dst_height == 1) {
ScaleRowUp(src_uv + ((src_height - 1) / 2) * (int64_t)src_stride, dst_uv,
ScaleRowUp(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride, dst_uv,
dst_width);
} else {
dy = FixedDiv(src_height - 1, dst_height - 1);
y = (1 << 15) - 1;
for (i = 0; i < dst_height; ++i) {
ScaleRowUp(src_uv + (y >> 16) * (int64_t)src_stride, dst_uv, dst_width);
ScaleRowUp(src_uv + (y >> 16) * (intptr_t)src_stride, dst_uv, dst_width);
dst_uv += dst_stride;
y += dy;
}
@ -770,13 +770,13 @@ void ScaleUVLinearUp2_16(int src_width,
#endif
if (dst_height == 1) {
ScaleRowUp(src_uv + ((src_height - 1) / 2) * (int64_t)src_stride, dst_uv,
ScaleRowUp(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride, dst_uv,
dst_width);
} else {
dy = FixedDiv(src_height - 1, dst_height - 1);
y = (1 << 15) - 1;
for (i = 0; i < dst_height; ++i) {
ScaleRowUp(src_uv + (y >> 16) * (int64_t)src_stride, dst_uv, dst_width);
ScaleRowUp(src_uv + (y >> 16) * (intptr_t)src_stride, dst_uv, dst_width);
dst_uv += dst_stride;
y += dy;
}
@ -889,7 +889,7 @@ static void ScaleUVSimple(int src_width,
}
for (j = 0; j < dst_height; ++j) {
ScaleUVCols(dst_uv, src_uv + (y >> 16) * (int64_t)src_stride, dst_width, x,
ScaleUVCols(dst_uv, src_uv + (y >> 16) * (intptr_t)src_stride, dst_width, x,
dx);
dst_uv += dst_stride;
y += dy;
@ -910,7 +910,7 @@ static int UVCopy(const uint8_t* src_uv,
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_uv = src_uv + (height - 1) * (int64_t)src_stride_uv;
src_uv = src_uv + (height - 1) * (intptr_t)src_stride_uv;
src_stride_uv = -src_stride_uv;
}
@ -930,7 +930,7 @@ static int UVCopy_16(const uint16_t* src_uv,
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_uv = src_uv + (height - 1) * (int64_t)src_stride_uv;
src_uv = src_uv + (height - 1) * (intptr_t)src_stride_uv;
src_stride_uv = -src_stride_uv;
}
@ -968,7 +968,7 @@ static void ScaleUV(const uint8_t* src,
// Negative src_height means invert the image.
if (src_height < 0) {
src_height = -src_height;
src = src + (src_height - 1) * (int64_t)src_stride;
src = src + (src_height - 1) * (intptr_t)src_stride;
src_stride = -src_stride;
}
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
@ -983,7 +983,7 @@ static void ScaleUV(const uint8_t* src,
if (clip_y) {
int64_t clipf = (int64_t)(clip_y)*dy;
y += (clipf & 0xffff);
src += (clipf >> 16) * (int64_t)src_stride;
src += (clipf >> 16) * (intptr_t)src_stride;
dst += clip_y * dst_stride;
}
@ -1024,7 +1024,7 @@ static void ScaleUV(const uint8_t* src,
#ifdef HAS_UVCOPY
if (dx == 0x10000 && dy == 0x10000) {
// Straight copy.
UVCopy(src + (y >> 16) * (int64_t)src_stride + (x >> 16) * 2,
UVCopy(src + (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2,
src_stride, dst, dst_stride, clip_width, clip_height);
return;
}
@ -1118,7 +1118,7 @@ int UVScale_16(const uint16_t* src_uv,
// Negative src_height means invert the image.
if (src_height < 0) {
src_height = -src_height;
src_uv = src_uv + (src_height - 1) * (int64_t)src_stride_uv;
src_uv = src_uv + (src_height - 1) * (intptr_t)src_stride_uv;
src_stride_uv = -src_stride_uv;
}
src_width = Abs(src_width);
@ -1126,13 +1126,13 @@ int UVScale_16(const uint16_t* src_uv,
#ifdef HAS_UVCOPY
if (!filtering && src_width == dst_width && (src_height % dst_height == 0)) {
if (dst_height == 1) {
UVCopy_16(src_uv + ((src_height - 1) / 2) * (int64_t)src_stride_uv,
UVCopy_16(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride_uv,
src_stride_uv, dst_uv, dst_stride_uv, dst_width, dst_height);
} else {
dy = src_height / dst_height;
UVCopy_16(src_uv + ((dy - 1) / 2) * (int64_t)src_stride_uv,
dy * (int64_t)src_stride_uv, dst_uv, dst_stride_uv, dst_width,
dst_height);
UVCopy_16(src_uv + ((dy - 1) / 2) * (intptr_t)src_stride_uv,
(int)(dy * (intptr_t)src_stride_uv), dst_uv, dst_stride_uv,
dst_width, dst_height);
}
return 0;

View File

@ -417,151 +417,136 @@ TESTPLANARTOBP(I210, uint16_t, 2, 2, 1, P210, uint16_t, 2, 2, 1, 10)
TESTPLANARTOBP(I012, uint16_t, 2, 2, 2, P012, uint16_t, 2, 2, 2, 12)
TESTPLANARTOBP(I212, uint16_t, 2, 2, 1, P212, uint16_t, 2, 2, 1, 12)
#define TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \
DOY, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \
static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \
static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \
"SRC_SUBSAMP_X unsupported"); \
static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \
"SRC_SUBSAMP_Y unsupported"); \
static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \
"DST_SUBSAMP_X unsupported"); \
static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \
"DST_SUBSAMP_Y unsupported"); \
const int kWidth = W1280; \
const int kHeight = benchmark_height_; \
const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \
const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \
const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \
const int kPaddedWidth = (kWidth + (TILE_WIDTH - 1)) & ~(TILE_WIDTH - 1); \
const int kPaddedHeight = \
(kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1); \
const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X); \
const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y); \
align_buffer_page_end(src_y, kPaddedWidth* kPaddedHeight* SRC_BPC + OFF); \
align_buffer_page_end( \
src_uv, \
2 * kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * SRC_BPC + OFF); \
align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \
align_buffer_page_end(dst_uv_c, \
2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \
align_buffer_page_end(dst_uv_opt, \
2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \
SRC_T* src_uv_p = reinterpret_cast<SRC_T*>(src_uv + OFF); \
for (int i = 0; i < kPaddedWidth * kPaddedHeight; ++i) { \
src_y_p[i] = \
(fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \
} \
for (int i = 0; i < kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * 2; ++i) { \
src_uv_p[i] = \
(fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \
} \
memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \
memset(dst_uv_c, 2, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \
memset(dst_uv_opt, 102, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
MaskCpuFlags(disable_cpu_flags_); \
SRC_FMT_PLANAR##To##FMT_PLANAR( \
src_y_p, kWidth, src_uv_p, 2 * kSrcHalfWidth, \
DOY ? reinterpret_cast<DST_T*>(dst_y_c) : NULL, kWidth, \
reinterpret_cast<DST_T*>(dst_uv_c), 2 * kDstHalfWidth, kWidth, \
NEG kHeight); \
MaskCpuFlags(benchmark_cpu_info_); \
for (int i = 0; i < benchmark_iterations_; ++i) { \
SRC_FMT_PLANAR##To##FMT_PLANAR( \
src_y_p, kWidth, src_uv_p, 2 * kSrcHalfWidth, \
DOY ? reinterpret_cast<DST_T*>(dst_y_opt) : NULL, kWidth, \
reinterpret_cast<DST_T*>(dst_uv_opt), 2 * kDstHalfWidth, kWidth, \
NEG kHeight); \
} \
if (DOY) { \
for (int i = 0; i < kHeight; ++i) { \
for (int j = 0; j < kWidth; ++j) { \
EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \
} \
} \
} \
for (int i = 0; i < kDstHalfHeight; ++i) { \
for (int j = 0; j < 2 * kDstHalfWidth; ++j) { \
EXPECT_EQ(dst_uv_c[i * 2 * kDstHalfWidth + j], \
dst_uv_opt[i * 2 * kDstHalfWidth + j]); \
} \
} \
free_aligned_buffer_page_end(dst_y_c); \
free_aligned_buffer_page_end(dst_uv_c); \
free_aligned_buffer_page_end(dst_y_opt); \
free_aligned_buffer_page_end(dst_uv_opt); \
free_aligned_buffer_page_end(src_y); \
free_aligned_buffer_page_end(src_uv); \
#define TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
DST_SUBSAMP_Y, W1280, N, NEG, OFF, DOY, SRC_DEPTH, \
TILE_WIDTH, TILE_HEIGHT) \
TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \
static_assert(SRC_SUBSAMP_X == 1 || SRC_SUBSAMP_X == 2, \
"SRC_SUBSAMP_X unsupported"); \
static_assert(SRC_SUBSAMP_Y == 1 || SRC_SUBSAMP_Y == 2, \
"SRC_SUBSAMP_Y unsupported"); \
static_assert(DST_SUBSAMP_X == 1 || DST_SUBSAMP_X == 2, \
"DST_SUBSAMP_X unsupported"); \
static_assert(DST_SUBSAMP_Y == 1 || DST_SUBSAMP_Y == 2, \
"DST_SUBSAMP_Y unsupported"); \
const int kWidth = W1280; \
const int kHeight = benchmark_height_; \
const int kSrcHalfWidth = SUBSAMPLE(kWidth, SRC_SUBSAMP_X); \
const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \
const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \
const int kPaddedWidth = (kWidth + (TILE_WIDTH - 1)) & ~(TILE_WIDTH - 1); \
const int kPaddedHeight = \
(kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1); \
const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X); \
const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y); \
align_buffer_page_end(src_y, kPaddedWidth* kPaddedHeight* SRC_BPC + OFF); \
align_buffer_page_end( \
src_uv, \
2 * kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * SRC_BPC + OFF); \
align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \
align_buffer_page_end(dst_uv_c, \
2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \
align_buffer_page_end(dst_uv_opt, \
2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \
SRC_T* src_uv_p = reinterpret_cast<SRC_T*>(src_uv + OFF); \
for (int i = 0; \
i < kPaddedWidth * kPaddedHeight * SRC_BPC / (int)sizeof(SRC_T); \
++i) { \
src_y_p[i] = \
(fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \
} \
for (int i = 0; i < kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * 2 * \
SRC_BPC / (int)sizeof(SRC_T); \
++i) { \
src_uv_p[i] = \
(fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \
} \
memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \
memset(dst_uv_c, 2, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \
memset(dst_uv_opt, 102, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \
MaskCpuFlags(disable_cpu_flags_); \
SRC_FMT_PLANAR##To##FMT_PLANAR( \
src_y_p, kWidth* SRC_BPC / (int)sizeof(SRC_T), src_uv_p, \
2 * kSrcHalfWidth * SRC_BPC / (int)sizeof(SRC_T), \
DOY ? reinterpret_cast<DST_T*>(dst_y_c) : NULL, kWidth, \
reinterpret_cast<DST_T*>(dst_uv_c), 2 * kDstHalfWidth, kWidth, \
NEG kHeight); \
MaskCpuFlags(benchmark_cpu_info_); \
for (int i = 0; i < benchmark_iterations_; ++i) { \
SRC_FMT_PLANAR##To##FMT_PLANAR( \
src_y_p, kWidth* SRC_BPC / (int)sizeof(SRC_T), src_uv_p, \
2 * kSrcHalfWidth * SRC_BPC / (int)sizeof(SRC_T), \
DOY ? reinterpret_cast<DST_T*>(dst_y_opt) : NULL, kWidth, \
reinterpret_cast<DST_T*>(dst_uv_opt), 2 * kDstHalfWidth, kWidth, \
NEG kHeight); \
} \
if (DOY) { \
for (int i = 0; i < kHeight; ++i) { \
for (int j = 0; j < kWidth; ++j) { \
EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \
} \
} \
} \
for (int i = 0; i < kDstHalfHeight; ++i) { \
for (int j = 0; j < 2 * kDstHalfWidth; ++j) { \
EXPECT_EQ(dst_uv_c[i * 2 * kDstHalfWidth + j], \
dst_uv_opt[i * 2 * kDstHalfWidth + j]); \
} \
} \
free_aligned_buffer_page_end(dst_y_c); \
free_aligned_buffer_page_end(dst_uv_c); \
free_aligned_buffer_page_end(dst_y_opt); \
free_aligned_buffer_page_end(dst_uv_opt); \
free_aligned_buffer_page_end(src_y); \
free_aligned_buffer_page_end(src_uv); \
}
#define TESTBIPLANARTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, \
TILE_HEIGHT) \
TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
DST_SUBSAMP_Y, benchmark_width_ + 1, _Any, +, 0, 1, \
SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
DST_SUBSAMP_Y, benchmark_width_, _Unaligned, +, 2, 1, \
SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
DST_SUBSAMP_Y, benchmark_width_, _Invert, -, 0, 1, \
SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, 1, SRC_DEPTH, \
TILE_WIDTH, TILE_HEIGHT) \
TESTBIPLANARTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
DST_SUBSAMP_Y, benchmark_width_, _NullY, +, 0, 0, \
SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)
#define TESTBPTOBP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
benchmark_width_ + 1, _Any, +, 0, 1, SRC_DEPTH, TILE_WIDTH, \
TILE_HEIGHT) \
TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
benchmark_width_, _Unaligned, +, 2, 1, SRC_DEPTH, TILE_WIDTH, \
TILE_HEIGHT) \
TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
benchmark_width_, _Invert, -, 0, 1, SRC_DEPTH, TILE_WIDTH, \
TILE_HEIGHT) \
TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
benchmark_width_, _Opt, +, 0, 1, SRC_DEPTH, TILE_WIDTH, \
TILE_HEIGHT) \
TESTBPTOBPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
benchmark_width_, _NullY, +, 0, 0, SRC_DEPTH, TILE_WIDTH, \
TILE_HEIGHT)
TESTBIPLANARTOBP(NV21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 1, 1)
TESTBIPLANARTOBP(NV12, uint8_t, 1, 2, 2, NV12Mirror, uint8_t, 1, 2, 2, 8, 1, 1)
TESTBIPLANARTOBP(NV12, uint8_t, 1, 2, 2, NV24, uint8_t, 1, 1, 1, 8, 1, 1)
TESTBIPLANARTOBP(NV16, uint8_t, 1, 2, 1, NV24, uint8_t, 1, 1, 1, 8, 1, 1)
TESTBIPLANARTOBP(P010, uint16_t, 2, 2, 2, P410, uint16_t, 2, 1, 1, 10, 1, 1)
TESTBIPLANARTOBP(P210, uint16_t, 2, 2, 1, P410, uint16_t, 2, 1, 1, 10, 1, 1)
TESTBIPLANARTOBP(P012, uint16_t, 2, 2, 2, P412, uint16_t, 2, 1, 1, 10, 1, 1)
TESTBIPLANARTOBP(P212, uint16_t, 2, 2, 1, P412, uint16_t, 2, 1, 1, 12, 1, 1)
TESTBIPLANARTOBP(P016, uint16_t, 2, 2, 2, P416, uint16_t, 2, 1, 1, 12, 1, 1)
TESTBIPLANARTOBP(P216, uint16_t, 2, 2, 1, P416, uint16_t, 2, 1, 1, 12, 1, 1)
TESTBIPLANARTOBP(MM21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 16, 32)
TESTBPTOBP(NV21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 1, 1)
TESTBPTOBP(NV12, uint8_t, 1, 2, 2, NV12Mirror, uint8_t, 1, 2, 2, 8, 1, 1)
TESTBPTOBP(NV12, uint8_t, 1, 2, 2, NV24, uint8_t, 1, 1, 1, 8, 1, 1)
TESTBPTOBP(NV16, uint8_t, 1, 2, 1, NV24, uint8_t, 1, 1, 1, 8, 1, 1)
TESTBPTOBP(P010, uint16_t, 2, 2, 2, P410, uint16_t, 2, 1, 1, 10, 1, 1)
TESTBPTOBP(P210, uint16_t, 2, 2, 1, P410, uint16_t, 2, 1, 1, 10, 1, 1)
TESTBPTOBP(P012, uint16_t, 2, 2, 2, P412, uint16_t, 2, 1, 1, 10, 1, 1)
TESTBPTOBP(P212, uint16_t, 2, 2, 1, P412, uint16_t, 2, 1, 1, 12, 1, 1)
TESTBPTOBP(P016, uint16_t, 2, 2, 2, P416, uint16_t, 2, 1, 1, 12, 1, 1)
TESTBPTOBP(P216, uint16_t, 2, 2, 1, P416, uint16_t, 2, 1, 1, 12, 1, 1)
TESTBPTOBP(MM21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 16, 32)
TESTBPTOBP(MT2T, uint8_t, 10 / 8, 2, 2, P010, uint16_t, 2, 2, 2, 10, 16, 32)
// TODO (greenjustin): Test all variants.
TESTBIPLANARTOBPI(MT2T,
uint16_t,
2,
2,
2,
P010,
uint16_t,
2,
2,
2,
benchmark_width_,
_Opt,
+,
0,
1,
10,
16,
32)
#define TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \
SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
#define TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
DST_SUBSAMP_Y, W1280, N, NEG, OFF, SRC_DEPTH, TILE_WIDTH, \
TILE_HEIGHT) \
TEST_F(LibYUVConvertTest, SRC_FMT_PLANAR##To##FMT_PLANAR##N) { \
static_assert(SRC_BPC == 1 || SRC_BPC == 2, "SRC BPC unsupported"); \
static_assert(DST_BPC == 1 || DST_BPC == 2, "DST BPC unsupported"); \
@ -641,30 +626,28 @@ TESTBIPLANARTOBPI(MT2T,
free_aligned_buffer_page_end(src_uv); \
}
#define TESTBIPLANARTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
DST_SUBSAMP_X, DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, \
TILE_HEIGHT) \
TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
DST_SUBSAMP_Y, benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH, \
TILE_WIDTH, TILE_HEIGHT) \
TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
DST_SUBSAMP_Y, benchmark_width_, _Unaligned, +, 2, \
SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
DST_SUBSAMP_Y, benchmark_width_, _Invert, -, 0, SRC_DEPTH, \
TILE_WIDTH, TILE_HEIGHT) \
TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
DST_SUBSAMP_Y, benchmark_width_, _Opt, +, 0, SRC_DEPTH, \
TILE_WIDTH, TILE_HEIGHT)
#define TESTBPTOP(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
DST_SUBSAMP_Y, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT) \
TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
benchmark_width_ + 1, _Any, +, 0, SRC_DEPTH, TILE_WIDTH, \
TILE_HEIGHT) \
TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
benchmark_width_, _Unaligned, +, 2, SRC_DEPTH, TILE_WIDTH, \
TILE_HEIGHT) \
TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
benchmark_width_, _Invert, -, 0, SRC_DEPTH, TILE_WIDTH, \
TILE_HEIGHT) \
TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, SRC_SUBSAMP_Y, \
FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, DST_SUBSAMP_Y, \
benchmark_width_, _Opt, +, 0, SRC_DEPTH, TILE_WIDTH, TILE_HEIGHT)
TESTBIPLANARTOP(NV12, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1)
TESTBIPLANARTOP(NV21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1)
TESTBIPLANARTOP(MM21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 16, 32)
TESTBPTOP(NV12, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1)
TESTBPTOP(NV21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 1, 1)
TESTBPTOP(MM21, uint8_t, 1, 2, 2, I420, uint8_t, 1, 2, 2, 8, 16, 32)
// Provide matrix wrappers for full range bt.709
#define F420ToABGR(a, b, c, d, e, f, g, h, i, j) \
@ -1089,8 +1072,8 @@ TESTQPLANARTOB(I420Alpha, 2, 2, ARGBFilter, 4, 4, 1)
TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1)
#endif
#define TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, \
BPP_B, W1280, N, NEG, OFF) \
#define TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
W1280, N, NEG, OFF) \
TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
const int kWidth = W1280; \
const int kHeight = benchmark_height_; \
@ -1143,15 +1126,15 @@ TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1)
free_aligned_buffer_page_end(dst_argb32_opt); \
}
#define TESTBIPLANARTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B) \
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
benchmark_width_ + 1, _Any, +, 0) \
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
benchmark_width_, _Unaligned, +, 2) \
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
benchmark_width_, _Invert, -, 0) \
TESTBIPLANARTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
benchmark_width_, _Opt, +, 0)
#define TESTBPTOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B) \
TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
benchmark_width_ + 1, _Any, +, 0) \
TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
benchmark_width_, _Unaligned, +, 2) \
TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
benchmark_width_, _Invert, -, 0) \
TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
benchmark_width_, _Opt, +, 0)
#define JNV12ToARGB(a, b, c, d, e, f, g, h) \
NV12ToARGBMatrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h)
@ -1172,29 +1155,29 @@ TESTQPLANARTOB(I422Alpha, 2, 1, ARGBFilter, 4, 4, 1)
#define JNV12ToRGB565(a, b, c, d, e, f, g, h) \
NV12ToRGB565Matrix(a, b, c, d, e, f, &kYuvJPEGConstants, g, h)
TESTBIPLANARTOB(JNV12, 2, 2, ARGB, ARGB, 4)
TESTBIPLANARTOB(JNV21, 2, 2, ARGB, ARGB, 4)
TESTBIPLANARTOB(JNV12, 2, 2, ABGR, ABGR, 4)
TESTBIPLANARTOB(JNV21, 2, 2, ABGR, ABGR, 4)
TESTBIPLANARTOB(JNV12, 2, 2, RGB24, RGB24, 3)
TESTBIPLANARTOB(JNV21, 2, 2, RGB24, RGB24, 3)
TESTBIPLANARTOB(JNV12, 2, 2, RAW, RAW, 3)
TESTBIPLANARTOB(JNV21, 2, 2, RAW, RAW, 3)
TESTBPTOB(JNV12, 2, 2, ARGB, ARGB, 4)
TESTBPTOB(JNV21, 2, 2, ARGB, ARGB, 4)
TESTBPTOB(JNV12, 2, 2, ABGR, ABGR, 4)
TESTBPTOB(JNV21, 2, 2, ABGR, ABGR, 4)
TESTBPTOB(JNV12, 2, 2, RGB24, RGB24, 3)
TESTBPTOB(JNV21, 2, 2, RGB24, RGB24, 3)
TESTBPTOB(JNV12, 2, 2, RAW, RAW, 3)
TESTBPTOB(JNV21, 2, 2, RAW, RAW, 3)
#ifdef LITTLE_ENDIAN_ONLY_TEST
TESTBIPLANARTOB(JNV12, 2, 2, RGB565, RGB565, 2)
TESTBPTOB(JNV12, 2, 2, RGB565, RGB565, 2)
#endif
TESTBIPLANARTOB(NV12, 2, 2, ARGB, ARGB, 4)
TESTBIPLANARTOB(NV21, 2, 2, ARGB, ARGB, 4)
TESTBIPLANARTOB(NV12, 2, 2, ABGR, ABGR, 4)
TESTBIPLANARTOB(NV21, 2, 2, ABGR, ABGR, 4)
TESTBIPLANARTOB(NV12, 2, 2, RGB24, RGB24, 3)
TESTBIPLANARTOB(NV21, 2, 2, RGB24, RGB24, 3)
TESTBIPLANARTOB(NV12, 2, 2, RAW, RAW, 3)
TESTBIPLANARTOB(NV21, 2, 2, RAW, RAW, 3)
TESTBIPLANARTOB(NV21, 2, 2, YUV24, RAW, 3)
TESTBPTOB(NV12, 2, 2, ARGB, ARGB, 4)
TESTBPTOB(NV21, 2, 2, ARGB, ARGB, 4)
TESTBPTOB(NV12, 2, 2, ABGR, ABGR, 4)
TESTBPTOB(NV21, 2, 2, ABGR, ABGR, 4)
TESTBPTOB(NV12, 2, 2, RGB24, RGB24, 3)
TESTBPTOB(NV21, 2, 2, RGB24, RGB24, 3)
TESTBPTOB(NV12, 2, 2, RAW, RAW, 3)
TESTBPTOB(NV21, 2, 2, RAW, RAW, 3)
TESTBPTOB(NV21, 2, 2, YUV24, RAW, 3)
#ifdef LITTLE_ENDIAN_ONLY_TEST
TESTBIPLANARTOB(NV12, 2, 2, RGB565, RGB565, 2)
TESTBPTOB(NV12, 2, 2, RGB565, RGB565, 2)
#endif
#define TESTATOPLANARI(FMT_A, BPP_A, YALIGN, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
@ -1289,8 +1272,8 @@ TESTATOPLANAR(UYVY, 2, 1, I422, 2, 1)
TESTATOPLANAR(YUY2, 2, 1, I420, 2, 2)
TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1)
#define TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, \
SUBSAMP_Y, W1280, N, NEG, OFF) \
#define TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
W1280, N, NEG, OFF) \
TEST_F(LibYUVConvertTest, FMT_A##To##FMT_PLANAR##N) { \
const int kWidth = W1280; \
const int kHeight = benchmark_height_; \
@ -1336,25 +1319,25 @@ TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1)
free_aligned_buffer_page_end(src_argb); \
}
#define TESTATOBIPLANAR(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
benchmark_width_ + 1, _Any, +, 0) \
TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
benchmark_width_, _Unaligned, +, 2) \
TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
benchmark_width_, _Invert, -, 0) \
TESTATOBIPLANARI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
benchmark_width_, _Opt, +, 0)
#define TESTATOBP(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y) \
TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
benchmark_width_ + 1, _Any, +, 0) \
TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
benchmark_width_, _Unaligned, +, 2) \
TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
benchmark_width_, _Invert, -, 0) \
TESTATOBPI(FMT_A, SUB_A, BPP_A, FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, \
benchmark_width_, _Opt, +, 0)
TESTATOBIPLANAR(ARGB, 1, 4, NV12, 2, 2)
TESTATOBIPLANAR(ARGB, 1, 4, NV21, 2, 2)
TESTATOBIPLANAR(ABGR, 1, 4, NV12, 2, 2)
TESTATOBIPLANAR(ABGR, 1, 4, NV21, 2, 2)
TESTATOBIPLANAR(RAW, 1, 3, JNV21, 2, 2)
TESTATOBIPLANAR(YUY2, 2, 4, NV12, 2, 2)
TESTATOBIPLANAR(UYVY, 2, 4, NV12, 2, 2)
TESTATOBIPLANAR(AYUV, 1, 4, NV12, 2, 2)
TESTATOBIPLANAR(AYUV, 1, 4, NV21, 2, 2)
TESTATOBP(ARGB, 1, 4, NV12, 2, 2)
TESTATOBP(ARGB, 1, 4, NV21, 2, 2)
TESTATOBP(ABGR, 1, 4, NV12, 2, 2)
TESTATOBP(ABGR, 1, 4, NV21, 2, 2)
TESTATOBP(RAW, 1, 3, JNV21, 2, 2)
TESTATOBP(YUY2, 2, 4, NV12, 2, 2)
TESTATOBP(UYVY, 2, 4, NV12, 2, 2)
TESTATOBP(AYUV, 1, 4, NV12, 2, 2)
TESTATOBP(AYUV, 1, 4, NV21, 2, 2)
#define TESTATOBI(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, TYPE_B, \
EPP_B, STRIDE_B, HEIGHT_B, W1280, N, NEG, OFF) \
@ -3935,8 +3918,8 @@ TESTQPLANAR16TOB(I010Alpha, 2, 2, ARGBFilter, 4, 4, 1, 10)
TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10)
#endif // DISABLE_SLOW_TESTS
#define TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
ALIGN, YALIGN, W1280, N, NEG, SOFF, DOFF, S_DEPTH) \
#define TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, W1280, N, NEG, SOFF, DOFF, S_DEPTH) \
TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
const int kWidth = W1280; \
const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \
@ -3979,16 +3962,16 @@ TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10)
free_aligned_buffer_page_end(dst_argb_opt); \
}
#define TESTBIPLANAR16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, \
ALIGN, YALIGN, S_DEPTH) \
TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_ + 1, _Any, +, 0, 0, S_DEPTH) \
TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_, _Unaligned, +, 4, 4, S_DEPTH) \
TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_, _Invert, -, 0, 0, S_DEPTH) \
TESTBIPLANAR16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, benchmark_width_, _Opt, +, 0, 0, S_DEPTH)
#define TESTBP16TOB(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
YALIGN, S_DEPTH) \
TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
benchmark_width_ + 1, _Any, +, 0, 0, S_DEPTH) \
TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
benchmark_width_, _Unaligned, +, 4, 4, S_DEPTH) \
TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
benchmark_width_, _Invert, -, 0, 0, S_DEPTH) \
TESTBP16TOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, YALIGN, \
benchmark_width_, _Opt, +, 0, 0, S_DEPTH)
#define P010ToARGB(a, b, c, d, e, f, g, h) \
P010ToARGBMatrix(a, b, c, d, e, f, &kYuvH709Constants, g, h)
@ -4031,23 +4014,23 @@ TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10)
kFilterBilinear)
#if !defined(DISABLE_SLOW_TESTS) || defined(__x86_64__) || defined(__i386__)
TESTBIPLANAR16TOB(P010, 2, 2, ARGB, 4, 4, 1, 10)
TESTBIPLANAR16TOB(P210, 2, 1, ARGB, 4, 4, 1, 10)
TESTBIPLANAR16TOB(P012, 2, 2, ARGB, 4, 4, 1, 12)
TESTBIPLANAR16TOB(P212, 2, 1, ARGB, 4, 4, 1, 12)
TESTBIPLANAR16TOB(P016, 2, 2, ARGB, 4, 4, 1, 16)
TESTBIPLANAR16TOB(P216, 2, 1, ARGB, 4, 4, 1, 16)
TESTBIPLANAR16TOB(P010, 2, 2, ARGBFilter, 4, 4, 1, 10)
TESTBIPLANAR16TOB(P210, 2, 1, ARGBFilter, 4, 4, 1, 10)
TESTBP16TOB(P010, 2, 2, ARGB, 4, 4, 1, 10)
TESTBP16TOB(P210, 2, 1, ARGB, 4, 4, 1, 10)
TESTBP16TOB(P012, 2, 2, ARGB, 4, 4, 1, 12)
TESTBP16TOB(P212, 2, 1, ARGB, 4, 4, 1, 12)
TESTBP16TOB(P016, 2, 2, ARGB, 4, 4, 1, 16)
TESTBP16TOB(P216, 2, 1, ARGB, 4, 4, 1, 16)
TESTBP16TOB(P010, 2, 2, ARGBFilter, 4, 4, 1, 10)
TESTBP16TOB(P210, 2, 1, ARGBFilter, 4, 4, 1, 10)
#ifdef LITTLE_ENDIAN_ONLY_TEST
TESTBIPLANAR16TOB(P010, 2, 2, AR30, 4, 4, 1, 10)
TESTBIPLANAR16TOB(P210, 2, 1, AR30, 4, 4, 1, 10)
TESTBIPLANAR16TOB(P012, 2, 2, AR30, 4, 4, 1, 12)
TESTBIPLANAR16TOB(P212, 2, 1, AR30, 4, 4, 1, 12)
TESTBIPLANAR16TOB(P016, 2, 2, AR30, 4, 4, 1, 16)
TESTBIPLANAR16TOB(P216, 2, 1, AR30, 4, 4, 1, 16)
TESTBIPLANAR16TOB(P010, 2, 2, AR30Filter, 4, 4, 1, 10)
TESTBIPLANAR16TOB(P210, 2, 1, AR30Filter, 4, 4, 1, 10)
TESTBP16TOB(P010, 2, 2, AR30, 4, 4, 1, 10)
TESTBP16TOB(P210, 2, 1, AR30, 4, 4, 1, 10)
TESTBP16TOB(P012, 2, 2, AR30, 4, 4, 1, 12)
TESTBP16TOB(P212, 2, 1, AR30, 4, 4, 1, 12)
TESTBP16TOB(P016, 2, 2, AR30, 4, 4, 1, 16)
TESTBP16TOB(P216, 2, 1, AR30, 4, 4, 1, 16)
TESTBP16TOB(P010, 2, 2, AR30Filter, 4, 4, 1, 10)
TESTBP16TOB(P210, 2, 1, AR30Filter, 4, 4, 1, 10)
#endif // LITTLE_ENDIAN_ONLY_TEST
#endif // DISABLE_SLOW_TESTS