mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
MT2T Warning fixes for fuchsia
Bug: b/258474032, b/257266635 Change-Id: Ic5cbbc60e2e1463361e359a2fe3e97976c1ea929 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/4081348 Commit-Queue: Frank Barchard <fbarchard@chromium.org> Reviewed-by: Justin Green <greenjustin@google.com>
This commit is contained in:
parent
c19943b4d0
commit
610e0cdead
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1850
|
||||
Version: 1852
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -162,6 +162,19 @@ int MM21ToYUY2(const uint8_t* src_y,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert MT2T to P010
|
||||
LIBYUV_API
|
||||
int MT2TToP010(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_uv,
|
||||
int src_stride_uv,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_uv,
|
||||
int dst_stride_uv,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I422 to NV21.
|
||||
LIBYUV_API
|
||||
int I422ToNV21(const uint8_t* src_y,
|
||||
|
||||
@ -455,6 +455,7 @@ extern "C" {
|
||||
#define HAS_DETILEROW_NEON
|
||||
#define HAS_DETILESPLITUVROW_NEON
|
||||
#define HAS_DETILETOYUY2_NEON
|
||||
#define HAS_UNPACKMT2T_NEON
|
||||
#define HAS_DIVIDEROW_16_NEON
|
||||
#define HAS_HALFFLOATROW_NEON
|
||||
#define HAS_HALFMERGEUVROW_NEON
|
||||
@ -2122,6 +2123,8 @@ void DetileToYUY2_Any_NEON(const uint8_t* src_y,
|
||||
ptrdiff_t src_uv_tile_stride,
|
||||
uint8_t* dst_yuy2,
|
||||
int width);
|
||||
void UnpackMT2T_C(const uint16_t* src, uint16_t* dst, size_t size);
|
||||
void UnpackMT2T_NEON(const uint16_t* src, uint16_t* dst, size_t size);
|
||||
void MergeUVRow_C(const uint8_t* src_u,
|
||||
const uint8_t* src_v,
|
||||
uint8_t* dst_uv,
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1850
|
||||
#define LIBYUV_VERSION 1852
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
@ -45,7 +45,7 @@ uint32_t HashDjb2(const uint8_t* src, uint64_t count, uint32_t seed) {
|
||||
}
|
||||
#endif
|
||||
|
||||
while (count >= (uint64_t)(kBlockSize)) {
|
||||
while (count >= (uint64_t)kBlockSize) {
|
||||
seed = HashDjb2_SSE(src, kBlockSize, seed);
|
||||
src += kBlockSize;
|
||||
count -= kBlockSize;
|
||||
@ -359,10 +359,10 @@ static double Ssim8x8_C(const uint8_t* src_a,
|
||||
(sum_a_sq + sum_b_sq + c1) *
|
||||
(count * sum_sq_a - sum_a_sq + count * sum_sq_b - sum_b_sq + c2);
|
||||
|
||||
if (ssim_d == 0.0) {
|
||||
if (ssim_d == 0) {
|
||||
return DBL_MAX;
|
||||
}
|
||||
return ssim_n * 1.0 / ssim_d;
|
||||
return (double)ssim_n / (double)ssim_d;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -67,7 +67,7 @@ uint32_t HammingDistance_SSE42(const uint8_t* src_a,
|
||||
:
|
||||
: "memory", "cc", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10");
|
||||
|
||||
return static_cast<uint32_t>(diff);
|
||||
return (uint32_t)(diff);
|
||||
}
|
||||
#else
|
||||
uint32_t HammingDistance_SSE42(const uint8_t* src_a,
|
||||
|
||||
@ -732,6 +732,67 @@ int MM21ToYUY2(const uint8_t* src_y,
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert MT2T into P010. See tinyurl.com/mtk-10bit-video-format for format
|
||||
// documentation.
|
||||
// TODO(greenjustin): Add an MT2T to I420 conversion.
|
||||
// TODO(greenjustin): Investigate if there are valid stride parameters other
|
||||
// than width.
|
||||
LIBYUV_API
|
||||
int MT2TToP010(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_uv,
|
||||
int src_stride_uv,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_uv,
|
||||
int dst_stride_uv,
|
||||
int width,
|
||||
int height) {
|
||||
if (width <= 0 || height <= 0 || !src_y || !src_uv || !dst_y || !dst_uv) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// TODO(greenjustin): Investigate if we can allow arbitrary sizes. This may
|
||||
// not be semantically meaningful in this format, but we do not have samples
|
||||
// of unaligned data to conclude that yet. This format is 16x32 tiled, so we
|
||||
// must pad the width and height to reflect that.
|
||||
int aligned_width = (width + 15) & ~15;
|
||||
int aligned_height = (height + 31) & ~31;
|
||||
|
||||
{
|
||||
size_t y_size = aligned_width * aligned_height * 10 / 8;
|
||||
size_t uv_size = aligned_width * ((aligned_height + 1) / 2) * 10 / 8;
|
||||
size_t tmp_y_size = aligned_width * aligned_height * sizeof(uint16_t);
|
||||
size_t tmp_uv_size =
|
||||
aligned_width * ((aligned_height + 1) / 2) * sizeof(uint16_t);
|
||||
void (*UnpackMT2T)(const uint16_t* src, uint16_t* dst, size_t size) =
|
||||
UnpackMT2T_C;
|
||||
align_buffer_64(tmp_y, tmp_y_size);
|
||||
align_buffer_64(tmp_uv, tmp_uv_size);
|
||||
|
||||
#if defined(HAS_UNPACKMT2T_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
UnpackMT2T = UnpackMT2T_NEON;
|
||||
}
|
||||
#endif
|
||||
|
||||
// TODO(greenjustin): Unpack and detile in rows rather than planes to keep
|
||||
// the caches hot.
|
||||
UnpackMT2T(src_y, (uint16_t*)tmp_y, y_size);
|
||||
UnpackMT2T(src_uv, (uint16_t*)tmp_uv, uv_size);
|
||||
|
||||
DetilePlane_16((uint16_t*)tmp_y, src_stride_y, dst_y, dst_stride_y, width,
|
||||
height, 32);
|
||||
DetilePlane_16((uint16_t*)tmp_uv, src_stride_uv, dst_uv, dst_stride_uv,
|
||||
width, (height + 1) / 2, 16);
|
||||
|
||||
free_aligned_buffer_64(tmp_y);
|
||||
free_aligned_buffer_64(tmp_uv);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef I422TONV21_ROW_VERSION
|
||||
// Unittest fails for this version.
|
||||
// 422 chroma is 1/2 width, 1x height
|
||||
|
||||
@ -109,7 +109,7 @@ LIBYUV_BOOL MJpegDecoder::LoadFrame(const uint8_t* src, size_t src_len) {
|
||||
}
|
||||
|
||||
buf_.data = src;
|
||||
buf_.len = static_cast<int>(src_len);
|
||||
buf_.len = (int)src_len;
|
||||
buf_vec_.pos = 0;
|
||||
decompress_struct_->client_data = &buf_vec_;
|
||||
#ifdef HAVE_SETJMP
|
||||
@ -428,7 +428,7 @@ boolean fill_input_buffer(j_decompress_ptr cinfo) {
|
||||
|
||||
void skip_input_data(j_decompress_ptr cinfo, long num_bytes) { // NOLINT
|
||||
jpeg_source_mgr* src = cinfo->src;
|
||||
size_t bytes = static_cast<size_t>(num_bytes);
|
||||
size_t bytes = (size_t)num_bytes;
|
||||
if (bytes > src->bytes_in_buffer) {
|
||||
src->next_input_byte = nullptr;
|
||||
src->bytes_in_buffer = 0;
|
||||
|
||||
@ -3196,6 +3196,7 @@ int RAWToRGB24(const uint8_t* src_raw,
|
||||
return 0;
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Consider uint8_t value
|
||||
LIBYUV_API
|
||||
void SetPlane(uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
@ -3256,7 +3257,7 @@ void SetPlane(uint8_t* dst_y,
|
||||
|
||||
// Set plane
|
||||
for (y = 0; y < height; ++y) {
|
||||
SetRow(dst_y, value, width);
|
||||
SetRow(dst_y, (uint8_t)value, width);
|
||||
dst_y += dst_stride_y;
|
||||
}
|
||||
}
|
||||
|
||||
@ -201,13 +201,13 @@ void TransposeWx8_NEON(const uint8_t* src,
|
||||
|
||||
"4: \n"
|
||||
|
||||
: "=&r"(src_temp), // %0
|
||||
"+r"(src), // %1
|
||||
"+r"(dst), // %2
|
||||
"+r"(width) // %3
|
||||
: "r"(&kVTbl4x4Transpose), // %4
|
||||
"r"(static_cast<ptrdiff_t>(src_stride)), // %5
|
||||
"r"(static_cast<ptrdiff_t>(dst_stride)) // %6
|
||||
: "=&r"(src_temp), // %0
|
||||
"+r"(src), // %1
|
||||
"+r"(dst), // %2
|
||||
"+r"(width) // %3
|
||||
: "r"(&kVTbl4x4Transpose), // %4
|
||||
"r"((ptrdiff_t)src_stride), // %5
|
||||
"r"((ptrdiff_t)dst_stride) // %6
|
||||
: "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16",
|
||||
"v17", "v18", "v19", "v20", "v21", "v22", "v23");
|
||||
}
|
||||
@ -423,15 +423,15 @@ void TransposeUVWx8_NEON(const uint8_t* src,
|
||||
|
||||
"4: \n"
|
||||
|
||||
: "=&r"(src_temp), // %0
|
||||
"+r"(src), // %1
|
||||
"+r"(dst_a), // %2
|
||||
"+r"(dst_b), // %3
|
||||
"+r"(width) // %4
|
||||
: "r"(static_cast<ptrdiff_t>(src_stride)), // %5
|
||||
"r"(static_cast<ptrdiff_t>(dst_stride_a)), // %6
|
||||
"r"(static_cast<ptrdiff_t>(dst_stride_b)), // %7
|
||||
"r"(&kVTbl4x4TransposeDi) // %8
|
||||
: "=&r"(src_temp), // %0
|
||||
"+r"(src), // %1
|
||||
"+r"(dst_a), // %2
|
||||
"+r"(dst_b), // %3
|
||||
"+r"(width) // %4
|
||||
: "r"((ptrdiff_t)src_stride), // %5
|
||||
"r"((ptrdiff_t)dst_stride_a), // %6
|
||||
"r"((ptrdiff_t)dst_stride_b), // %7
|
||||
"r"(&kVTbl4x4TransposeDi) // %8
|
||||
: "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16",
|
||||
"v17", "v18", "v19", "v20", "v21", "v22", "v23", "v30", "v31");
|
||||
}
|
||||
|
||||
@ -2801,6 +2801,33 @@ void DetileToYUY2_C(const uint8_t* src_y,
|
||||
}
|
||||
}
|
||||
|
||||
// Unpack MT2T into tiled P010 64 pixels at a time. MT2T's bitstream is encoded
|
||||
// in 80 byte blocks representing 64 pixels each. The first 16 bytes of the
|
||||
// block contain all of the lower 2 bits of each pixel packed together, and the
|
||||
// next 64 bytes represent all the upper 8 bits of the pixel.
|
||||
void UnpackMT2T_C(const uint16_t* src, uint16_t* dst, size_t size) {
|
||||
for (size_t i = 0; i < size; i += 80) {
|
||||
const uint8_t* src_lower_bits = (uint8_t*)src;
|
||||
const uint8_t* src_upper_bits = ((uint8_t*)src) + 16;
|
||||
|
||||
for (int j = 0; j < 16; j++) {
|
||||
uint8_t lower_bits = src_lower_bits[j];
|
||||
*dst++ = (lower_bits & 0x03) << 6 | (uint16_t)src_upper_bits[j * 4] << 8 |
|
||||
(uint16_t)src_upper_bits[j * 4] >> 2;
|
||||
*dst++ = (lower_bits & 0x0C) << 4 |
|
||||
(uint16_t)src_upper_bits[j * 4 + 1] << 8 |
|
||||
(uint16_t)src_upper_bits[j * 4 + 1] >> 2;
|
||||
*dst++ = (lower_bits & 0x30) << 2 |
|
||||
(uint16_t)src_upper_bits[j * 4 + 2] << 8 |
|
||||
(uint16_t)src_upper_bits[j * 4 + 2] >> 2;
|
||||
*dst++ = (lower_bits & 0xC0) | (uint16_t)src_upper_bits[j * 4 + 3] << 8 |
|
||||
(uint16_t)src_upper_bits[j * 4 + 3] >> 2;
|
||||
}
|
||||
|
||||
src += 40;
|
||||
}
|
||||
}
|
||||
|
||||
void SplitRGBRow_C(const uint8_t* src_rgb,
|
||||
uint8_t* dst_r,
|
||||
uint8_t* dst_g,
|
||||
|
||||
@ -720,6 +720,60 @@ void DetileToYUY2_NEON(const uint8_t* src_y,
|
||||
}
|
||||
#endif
|
||||
|
||||
void UnpackMT2T_NEON(const uint16_t* src, uint16_t* dst, size_t size) {
|
||||
const uint16_t* src_lower_bits = src;
|
||||
const uint16_t* src_upper_bits = src + 8;
|
||||
asm volatile(
|
||||
"1: \n"
|
||||
"vld4.8 {d1, d3, d5, d7}, [%1]! \n" // Load 32 bytes of upper
|
||||
// bits.
|
||||
"vld1.8 {d6}, [%0]! \n" // Load 8 bytes of lower
|
||||
// bits.
|
||||
"vshl.u8 d4, d6, #2 \n" // Align lower bits.
|
||||
"vshl.u8 d2, d6, #4 \n"
|
||||
"vshl.u8 d0, d6, #6 \n"
|
||||
"vzip.u8 d0, d1 \n" // Zip lower and upper
|
||||
// bits together.
|
||||
"vzip.u8 d2, d3 \n"
|
||||
"vzip.u8 d4, d5 \n"
|
||||
"vzip.u8 d6, d7 \n"
|
||||
"vsri.u16 q0, q0, #10 \n" // Copy upper 6 bits into
|
||||
// lower 6 bits for better
|
||||
// accuracy in
|
||||
// conversions.
|
||||
"vsri.u16 q1, q1, #10 \n"
|
||||
"vsri.u16 q2, q2, #10 \n"
|
||||
"vsri.u16 q3, q3, #10 \n"
|
||||
"vst4.16 {d0, d2, d4, d6}, [%2]! \n" // Store 32 pixels
|
||||
"vst4.16 {d1, d3, d5, d7}, [%2]! \n"
|
||||
"vld4.8 {d1, d3, d5, d7}, [%1]! \n" // Process last 32 pixels
|
||||
// in the block
|
||||
"vld1.8 {d6}, [%0]! \n"
|
||||
"vshl.u8 d4, d6, #2 \n"
|
||||
"vshl.u8 d2, d6, #4 \n"
|
||||
"vshl.u8 d0, d6, #6 \n"
|
||||
"vzip.u8 d0, d1 \n"
|
||||
"vzip.u8 d2, d3 \n"
|
||||
"vzip.u8 d4, d5 \n"
|
||||
"vzip.u8 d6, d7 \n"
|
||||
"vsri.u16 q0, q0, #10 \n"
|
||||
"vsri.u16 q1, q1, #10 \n"
|
||||
"vsri.u16 q2, q2, #10 \n"
|
||||
"vsri.u16 q3, q3, #10 \n"
|
||||
"vst4.16 {d0, d2, d4, d6}, [%2]! \n"
|
||||
"vst4.16 {d1, d3, d5, d7}, [%2]! \n"
|
||||
"mov %0, %1 \n"
|
||||
"add %1, %0, #16 \n"
|
||||
"subs %3, %3, #80 \n"
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_lower_bits), // %0
|
||||
"+r"(src_upper_bits), // %1
|
||||
"+r"(dst), // %2
|
||||
"+r"(size) // %3
|
||||
:
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3");
|
||||
}
|
||||
|
||||
// Reads 16 U's and V's and writes out 16 pairs of UV.
|
||||
void MergeUVRow_NEON(const uint8_t* src_u,
|
||||
const uint8_t* src_v,
|
||||
|
||||
@ -749,6 +749,54 @@ void DetileToYUY2_NEON(const uint8_t* src_y,
|
||||
}
|
||||
#endif
|
||||
|
||||
// Unpack MT2T into tiled P010 64 pixels at a time. See
|
||||
// tinyurl.com/mtk-10bit-video-format for format documentation.
|
||||
void UnpackMT2T_NEON(const uint16_t* src, uint16_t* dst, size_t size) {
|
||||
const uint16_t* src_lower_bits = src;
|
||||
const uint16_t* src_upper_bits = src + 8;
|
||||
asm volatile(
|
||||
"1: \n"
|
||||
"ld4 {v0.8b, v1.8b, v2.8b, v3.8b}, [%1], #32 \n"
|
||||
"ld1 {v7.8b}, [%0], #8 \n"
|
||||
"shl v6.8b, v7.8b, #2 \n"
|
||||
"shl v5.8b, v7.8b, #4 \n"
|
||||
"shl v4.8b, v7.8b, #6 \n"
|
||||
"zip1 v0.16b, v4.16b, v0.16b \n"
|
||||
"zip1 v1.16b, v5.16b, v1.16b \n"
|
||||
"zip1 v2.16b, v6.16b, v2.16b \n"
|
||||
"zip1 v3.16b, v7.16b, v3.16b \n"
|
||||
"sri v0.8h, v0.8h, #10 \n"
|
||||
"sri v1.8h, v1.8h, #10 \n"
|
||||
"sri v2.8h, v2.8h, #10 \n"
|
||||
"sri v3.8h, v3.8h, #10 \n"
|
||||
"st4 {v0.8h, v1.8h, v2.8h, v3.8h}, [%2], #64 \n"
|
||||
"ld4 {v0.8b, v1.8b, v2.8b, v3.8b}, [%1], #32 \n"
|
||||
"ld1 {v7.8b}, [%0], #8 \n"
|
||||
"shl v6.8b, v7.8b, #2 \n"
|
||||
"shl v5.8b, v7.8b, #4 \n"
|
||||
"shl v4.8b, v7.8b, #6 \n"
|
||||
"zip1 v0.16b, v4.16b, v0.16b \n"
|
||||
"zip1 v1.16b, v5.16b, v1.16b \n"
|
||||
"zip1 v2.16b, v6.16b, v2.16b \n"
|
||||
"zip1 v3.16b, v7.16b, v3.16b \n"
|
||||
"sri v0.8h, v0.8h, #10 \n"
|
||||
"sri v1.8h, v1.8h, #10 \n"
|
||||
"sri v2.8h, v2.8h, #10 \n"
|
||||
"sri v3.8h, v3.8h, #10 \n"
|
||||
"st4 {v0.8h, v1.8h, v2.8h, v3.8h}, [%2], #64 \n"
|
||||
"mov %0, %1 \n"
|
||||
"add %1, %0, #16 \n"
|
||||
"subs %3, %3, #80 \n"
|
||||
"b.gt 1b \n"
|
||||
: "+r"(src_lower_bits), // %0
|
||||
"+r"(src_upper_bits), // %1
|
||||
"+r"(dst), // %2
|
||||
"+r"(size) // %3
|
||||
:
|
||||
: "cc", "memory", "w0", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
|
||||
"v8", "v9", "v10", "v11", "v12");
|
||||
}
|
||||
|
||||
#if LIBYUV_USE_ST2
|
||||
// Reads 16 U's and V's and writes out 16 pairs of UV.
|
||||
void MergeUVRow_NEON(const uint8_t* src_u,
|
||||
|
||||
@ -538,6 +538,26 @@ TESTBIPLANARTOBP(P016, uint16_t, 2, 2, 2, P416, uint16_t, 2, 1, 1, 12, 1, 1)
|
||||
TESTBIPLANARTOBP(P216, uint16_t, 2, 2, 1, P416, uint16_t, 2, 1, 1, 12, 1, 1)
|
||||
TESTBIPLANARTOBP(MM21, uint8_t, 1, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 16, 32)
|
||||
|
||||
// TODO (greenjustin): Test all variants.
|
||||
TESTBIPLANARTOBPI(MT2T,
|
||||
uint16_t,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
P010,
|
||||
uint16_t,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
benchmark_width_,
|
||||
_Opt,
|
||||
+,
|
||||
0,
|
||||
1,
|
||||
10,
|
||||
16,
|
||||
32)
|
||||
|
||||
#define TESTBIPLANARTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
|
||||
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, \
|
||||
DST_SUBSAMP_X, DST_SUBSAMP_Y, W1280, N, NEG, OFF, \
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user