MT2T Warning fixes for fuchsia

Bug: b/258474032, b/257266635
Change-Id: Ic5cbbc60e2e1463361e359a2fe3e97976c1ea929
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4081348
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Reviewed-by: Justin Green <greenjustin@google.com>
This commit is contained in:
Frank Barchard 2022-12-05 16:10:38 -08:00 committed by libyuv LUCI CQ
parent c19943b4d0
commit 610e0cdead
14 changed files with 252 additions and 25 deletions

View File

@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1850
Version: 1852
License: BSD
License File: LICENSE

View File

@ -162,6 +162,19 @@ int MM21ToYUY2(const uint8_t* src_y,
int width,
int height);
// Convert MT2T to P010
LIBYUV_API
int MT2TToP010(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_uv,
int src_stride_uv,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_uv,
int dst_stride_uv,
int width,
int height);
// Convert I422 to NV21.
LIBYUV_API
int I422ToNV21(const uint8_t* src_y,

View File

@ -455,6 +455,7 @@ extern "C" {
#define HAS_DETILEROW_NEON
#define HAS_DETILESPLITUVROW_NEON
#define HAS_DETILETOYUY2_NEON
#define HAS_UNPACKMT2T_NEON
#define HAS_DIVIDEROW_16_NEON
#define HAS_HALFFLOATROW_NEON
#define HAS_HALFMERGEUVROW_NEON
@ -2122,6 +2123,8 @@ void DetileToYUY2_Any_NEON(const uint8_t* src_y,
ptrdiff_t src_uv_tile_stride,
uint8_t* dst_yuy2,
int width);
void UnpackMT2T_C(const uint16_t* src, uint16_t* dst, size_t size);
void UnpackMT2T_NEON(const uint16_t* src, uint16_t* dst, size_t size);
void MergeUVRow_C(const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_uv,

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1850
#define LIBYUV_VERSION 1852
#endif // INCLUDE_LIBYUV_VERSION_H_

View File

@ -45,7 +45,7 @@ uint32_t HashDjb2(const uint8_t* src, uint64_t count, uint32_t seed) {
}
#endif
while (count >= (uint64_t)(kBlockSize)) {
while (count >= (uint64_t)kBlockSize) {
seed = HashDjb2_SSE(src, kBlockSize, seed);
src += kBlockSize;
count -= kBlockSize;
@ -359,10 +359,10 @@ static double Ssim8x8_C(const uint8_t* src_a,
(sum_a_sq + sum_b_sq + c1) *
(count * sum_sq_a - sum_a_sq + count * sum_sq_b - sum_b_sq + c2);
if (ssim_d == 0.0) {
if (ssim_d == 0) {
return DBL_MAX;
}
return ssim_n * 1.0 / ssim_d;
return (double)ssim_n / (double)ssim_d;
}
}

View File

@ -67,7 +67,7 @@ uint32_t HammingDistance_SSE42(const uint8_t* src_a,
:
: "memory", "cc", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10");
return static_cast<uint32_t>(diff);
return (uint32_t)(diff);
}
#else
uint32_t HammingDistance_SSE42(const uint8_t* src_a,

View File

@ -732,6 +732,67 @@ int MM21ToYUY2(const uint8_t* src_y,
return 0;
}
// Convert MT2T into P010. See tinyurl.com/mtk-10bit-video-format for format
// documentation.
// TODO(greenjustin): Add an MT2T to I420 conversion.
// TODO(greenjustin): Investigate if there are valid stride parameters other
// than width.
LIBYUV_API
int MT2TToP010(const uint16_t* src_y,
int src_stride_y,
const uint16_t* src_uv,
int src_stride_uv,
uint16_t* dst_y,
int dst_stride_y,
uint16_t* dst_uv,
int dst_stride_uv,
int width,
int height) {
if (width <= 0 || height <= 0 || !src_y || !src_uv || !dst_y || !dst_uv) {
return -1;
}
// TODO(greenjustin): Investigate if we can allow arbitrary sizes. This may
// not be semantically meaningful in this format, but we do not have samples
// of unaligned data to conclude that yet. This format is 16x32 tiled, so we
// must pad the width and height to reflect that.
int aligned_width = (width + 15) & ~15;
int aligned_height = (height + 31) & ~31;
{
size_t y_size = aligned_width * aligned_height * 10 / 8;
size_t uv_size = aligned_width * ((aligned_height + 1) / 2) * 10 / 8;
size_t tmp_y_size = aligned_width * aligned_height * sizeof(uint16_t);
size_t tmp_uv_size =
aligned_width * ((aligned_height + 1) / 2) * sizeof(uint16_t);
void (*UnpackMT2T)(const uint16_t* src, uint16_t* dst, size_t size) =
UnpackMT2T_C;
align_buffer_64(tmp_y, tmp_y_size);
align_buffer_64(tmp_uv, tmp_uv_size);
#if defined(HAS_UNPACKMT2T_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
UnpackMT2T = UnpackMT2T_NEON;
}
#endif
// TODO(greenjustin): Unpack and detile in rows rather than planes to keep
// the caches hot.
UnpackMT2T(src_y, (uint16_t*)tmp_y, y_size);
UnpackMT2T(src_uv, (uint16_t*)tmp_uv, uv_size);
DetilePlane_16((uint16_t*)tmp_y, src_stride_y, dst_y, dst_stride_y, width,
height, 32);
DetilePlane_16((uint16_t*)tmp_uv, src_stride_uv, dst_uv, dst_stride_uv,
width, (height + 1) / 2, 16);
free_aligned_buffer_64(tmp_y);
free_aligned_buffer_64(tmp_uv);
}
return 0;
}
#ifdef I422TONV21_ROW_VERSION
// Unittest fails for this version.
// 422 chroma is 1/2 width, 1x height

View File

@ -109,7 +109,7 @@ LIBYUV_BOOL MJpegDecoder::LoadFrame(const uint8_t* src, size_t src_len) {
}
buf_.data = src;
buf_.len = static_cast<int>(src_len);
buf_.len = (int)src_len;
buf_vec_.pos = 0;
decompress_struct_->client_data = &buf_vec_;
#ifdef HAVE_SETJMP
@ -428,7 +428,7 @@ boolean fill_input_buffer(j_decompress_ptr cinfo) {
void skip_input_data(j_decompress_ptr cinfo, long num_bytes) { // NOLINT
jpeg_source_mgr* src = cinfo->src;
size_t bytes = static_cast<size_t>(num_bytes);
size_t bytes = (size_t)num_bytes;
if (bytes > src->bytes_in_buffer) {
src->next_input_byte = nullptr;
src->bytes_in_buffer = 0;

View File

@ -3196,6 +3196,7 @@ int RAWToRGB24(const uint8_t* src_raw,
return 0;
}
// TODO(fbarchard): Consider uint8_t value
LIBYUV_API
void SetPlane(uint8_t* dst_y,
int dst_stride_y,
@ -3256,7 +3257,7 @@ void SetPlane(uint8_t* dst_y,
// Set plane
for (y = 0; y < height; ++y) {
SetRow(dst_y, value, width);
SetRow(dst_y, (uint8_t)value, width);
dst_y += dst_stride_y;
}
}

View File

@ -201,13 +201,13 @@ void TransposeWx8_NEON(const uint8_t* src,
"4: \n"
: "=&r"(src_temp), // %0
"+r"(src), // %1
"+r"(dst), // %2
"+r"(width) // %3
: "r"(&kVTbl4x4Transpose), // %4
"r"(static_cast<ptrdiff_t>(src_stride)), // %5
"r"(static_cast<ptrdiff_t>(dst_stride)) // %6
: "=&r"(src_temp), // %0
"+r"(src), // %1
"+r"(dst), // %2
"+r"(width) // %3
: "r"(&kVTbl4x4Transpose), // %4
"r"((ptrdiff_t)src_stride), // %5
"r"((ptrdiff_t)dst_stride) // %6
: "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16",
"v17", "v18", "v19", "v20", "v21", "v22", "v23");
}
@ -423,15 +423,15 @@ void TransposeUVWx8_NEON(const uint8_t* src,
"4: \n"
: "=&r"(src_temp), // %0
"+r"(src), // %1
"+r"(dst_a), // %2
"+r"(dst_b), // %3
"+r"(width) // %4
: "r"(static_cast<ptrdiff_t>(src_stride)), // %5
"r"(static_cast<ptrdiff_t>(dst_stride_a)), // %6
"r"(static_cast<ptrdiff_t>(dst_stride_b)), // %7
"r"(&kVTbl4x4TransposeDi) // %8
: "=&r"(src_temp), // %0
"+r"(src), // %1
"+r"(dst_a), // %2
"+r"(dst_b), // %3
"+r"(width) // %4
: "r"((ptrdiff_t)src_stride), // %5
"r"((ptrdiff_t)dst_stride_a), // %6
"r"((ptrdiff_t)dst_stride_b), // %7
"r"(&kVTbl4x4TransposeDi) // %8
: "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16",
"v17", "v18", "v19", "v20", "v21", "v22", "v23", "v30", "v31");
}

View File

@ -2801,6 +2801,33 @@ void DetileToYUY2_C(const uint8_t* src_y,
}
}
// Unpack MT2T into tiled P010 64 pixels at a time. MT2T's bitstream is encoded
// in 80 byte blocks representing 64 pixels each. The first 16 bytes of the
// block contain all of the lower 2 bits of each pixel packed together, and the
// next 64 bytes represent all the upper 8 bits of the pixel.
void UnpackMT2T_C(const uint16_t* src, uint16_t* dst, size_t size) {
for (size_t i = 0; i < size; i += 80) {
const uint8_t* src_lower_bits = (uint8_t*)src;
const uint8_t* src_upper_bits = ((uint8_t*)src) + 16;
for (int j = 0; j < 16; j++) {
uint8_t lower_bits = src_lower_bits[j];
*dst++ = (lower_bits & 0x03) << 6 | (uint16_t)src_upper_bits[j * 4] << 8 |
(uint16_t)src_upper_bits[j * 4] >> 2;
*dst++ = (lower_bits & 0x0C) << 4 |
(uint16_t)src_upper_bits[j * 4 + 1] << 8 |
(uint16_t)src_upper_bits[j * 4 + 1] >> 2;
*dst++ = (lower_bits & 0x30) << 2 |
(uint16_t)src_upper_bits[j * 4 + 2] << 8 |
(uint16_t)src_upper_bits[j * 4 + 2] >> 2;
*dst++ = (lower_bits & 0xC0) | (uint16_t)src_upper_bits[j * 4 + 3] << 8 |
(uint16_t)src_upper_bits[j * 4 + 3] >> 2;
}
src += 40;
}
}
void SplitRGBRow_C(const uint8_t* src_rgb,
uint8_t* dst_r,
uint8_t* dst_g,

View File

@ -720,6 +720,60 @@ void DetileToYUY2_NEON(const uint8_t* src_y,
}
#endif
void UnpackMT2T_NEON(const uint16_t* src, uint16_t* dst, size_t size) {
const uint16_t* src_lower_bits = src;
const uint16_t* src_upper_bits = src + 8;
asm volatile(
"1: \n"
"vld4.8 {d1, d3, d5, d7}, [%1]! \n" // Load 32 bytes of upper
// bits.
"vld1.8 {d6}, [%0]! \n" // Load 8 bytes of lower
// bits.
"vshl.u8 d4, d6, #2 \n" // Align lower bits.
"vshl.u8 d2, d6, #4 \n"
"vshl.u8 d0, d6, #6 \n"
"vzip.u8 d0, d1 \n" // Zip lower and upper
// bits together.
"vzip.u8 d2, d3 \n"
"vzip.u8 d4, d5 \n"
"vzip.u8 d6, d7 \n"
"vsri.u16 q0, q0, #10 \n" // Copy upper 6 bits into
// lower 6 bits for better
// accuracy in
// conversions.
"vsri.u16 q1, q1, #10 \n"
"vsri.u16 q2, q2, #10 \n"
"vsri.u16 q3, q3, #10 \n"
"vst4.16 {d0, d2, d4, d6}, [%2]! \n" // Store 32 pixels
"vst4.16 {d1, d3, d5, d7}, [%2]! \n"
"vld4.8 {d1, d3, d5, d7}, [%1]! \n" // Process last 32 pixels
// in the block
"vld1.8 {d6}, [%0]! \n"
"vshl.u8 d4, d6, #2 \n"
"vshl.u8 d2, d6, #4 \n"
"vshl.u8 d0, d6, #6 \n"
"vzip.u8 d0, d1 \n"
"vzip.u8 d2, d3 \n"
"vzip.u8 d4, d5 \n"
"vzip.u8 d6, d7 \n"
"vsri.u16 q0, q0, #10 \n"
"vsri.u16 q1, q1, #10 \n"
"vsri.u16 q2, q2, #10 \n"
"vsri.u16 q3, q3, #10 \n"
"vst4.16 {d0, d2, d4, d6}, [%2]! \n"
"vst4.16 {d1, d3, d5, d7}, [%2]! \n"
"mov %0, %1 \n"
"add %1, %0, #16 \n"
"subs %3, %3, #80 \n"
"bgt 1b \n"
: "+r"(src_lower_bits), // %0
"+r"(src_upper_bits), // %1
"+r"(dst), // %2
"+r"(size) // %3
:
: "cc", "memory", "q0", "q1", "q2", "q3");
}
// Reads 16 U's and V's and writes out 16 pairs of UV.
void MergeUVRow_NEON(const uint8_t* src_u,
const uint8_t* src_v,

View File

@ -749,6 +749,54 @@ void DetileToYUY2_NEON(const uint8_t* src_y,
}
#endif
// Unpack MT2T into tiled P010 64 pixels at a time. See
// tinyurl.com/mtk-10bit-video-format for format documentation.
void UnpackMT2T_NEON(const uint16_t* src, uint16_t* dst, size_t size) {
const uint16_t* src_lower_bits = src;
const uint16_t* src_upper_bits = src + 8;
asm volatile(
"1: \n"
"ld4 {v0.8b, v1.8b, v2.8b, v3.8b}, [%1], #32 \n"
"ld1 {v7.8b}, [%0], #8 \n"
"shl v6.8b, v7.8b, #2 \n"
"shl v5.8b, v7.8b, #4 \n"
"shl v4.8b, v7.8b, #6 \n"
"zip1 v0.16b, v4.16b, v0.16b \n"
"zip1 v1.16b, v5.16b, v1.16b \n"
"zip1 v2.16b, v6.16b, v2.16b \n"
"zip1 v3.16b, v7.16b, v3.16b \n"
"sri v0.8h, v0.8h, #10 \n"
"sri v1.8h, v1.8h, #10 \n"
"sri v2.8h, v2.8h, #10 \n"
"sri v3.8h, v3.8h, #10 \n"
"st4 {v0.8h, v1.8h, v2.8h, v3.8h}, [%2], #64 \n"
"ld4 {v0.8b, v1.8b, v2.8b, v3.8b}, [%1], #32 \n"
"ld1 {v7.8b}, [%0], #8 \n"
"shl v6.8b, v7.8b, #2 \n"
"shl v5.8b, v7.8b, #4 \n"
"shl v4.8b, v7.8b, #6 \n"
"zip1 v0.16b, v4.16b, v0.16b \n"
"zip1 v1.16b, v5.16b, v1.16b \n"
"zip1 v2.16b, v6.16b, v2.16b \n"
"zip1 v3.16b, v7.16b, v3.16b \n"
"sri v0.8h, v0.8h, #10 \n"
"sri v1.8h, v1.8h, #10 \n"
"sri v2.8h, v2.8h, #10 \n"
"sri v3.8h, v3.8h, #10 \n"
"st4 {v0.8h, v1.8h, v2.8h, v3.8h}, [%2], #64 \n"
"mov %0, %1 \n"
"add %1, %0, #16 \n"
"subs %3, %3, #80 \n"
"b.gt 1b \n"
: "+r"(src_lower_bits), // %0
"+r"(src_upper_bits), // %1
"+r"(dst), // %2
"+r"(size) // %3
:
: "cc", "memory", "w0", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
"v8", "v9", "v10", "v11", "v12");
}
#if LIBYUV_USE_ST2
// Reads 16 U's and V's and writes out 16 pairs of UV.
void MergeUVRow_NEON(const uint8_t* src_u,

View File

@ -538,6 +538,26 @@ TESTBIPLANARTOBP(P016, uint16_t, 2, 2, 2, P416, uint16_t, 2, 1, 1, 12, 1, 1)
TESTBIPLANARTOBP(P216, uint16_t, 2, 2, 1, P416, uint16_t, 2, 1, 1, 12, 1, 1)
TESTBIPLANARTOBP(MM21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 16, 32)
// TODO (greenjustin): Test all variants.
TESTBIPLANARTOBPI(MT2T,
uint16_t,
2,
2,
2,
P010,
uint16_t,
2,
2,
2,
benchmark_width_,
_Opt,
+,
0,
1,
10,
16,
32)
#define TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \