mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 08:46:47 +08:00
I010ToNV12 conversion using 2 step row function for UV
- convert full Y plane with row coalescing if possible - convert rows of UV from 10 bit to 8 bit then call MergeUV libyuv_test '--gunit_filter=*010ToNV12_Opt' --libyuv_width=3840 --libyuv_height=2160 --libyuv_repeat=1000 --libyuv_flags=-1 --libyuv_cpu_info=-1 Note: Google Test filter = *010ToNV12_Opt Skylake Xeon Was 2 pass planes [ OK ] LibYUVConvertTest.I010ToNV12_Opt (4512 ms) Now 2 pass rows [ OK ] LibYUVConvertTest.I010ToNV12_Opt (2400 ms) [ OK ] LibYUVConvertTest.P010ToNV12_Opt (2265 ms) On Samsung S23 libyuv_test --gunit_filter=*.????ToNV12_Opt --libyuv_width=3840 --libyuv_height=2160 --libyuv_repeat=1000' Was [ OK ] LibYUVConvertTest.I010ToNV12_Opt (3563 ms) Now [ OK ] LibYUVConvertTest.AYUVToNV12_Opt (3068 ms [ OK ] LibYUVConvertTest.ARGBToNV12_Opt (2990 ms [ OK ] LibYUVConvertTest.ABGRToNV12_Opt (2904 ms [ OK ] LibYUVConvertTest.P010ToNV12_Opt (1177 ms [ OK ] LibYUVConvertTest.I010ToNV12_Opt (1150 ms <- now [ OK ] LibYUVConvertTest.I444ToNV12_Opt (1118 ms [ OK ] LibYUVConvertTest.MM21ToNV12_Opt (1008 ms [ OK ] LibYUVConvertTest.UYVYToNV12_Opt (1007 ms [ OK ] LibYUVConvertTest.YUY2ToNV12_Opt (938 ms) [ OK ] LibYUVConvertTest.NV21ToNV12_Opt (496 ms) [ OK ] LibYUVConvertTest.I420ToNV12_Opt (466 ms) Bug: b/357439226, b/357721018 Change-Id: I48405929ae835b171e7d556a16794eac22c50ae9 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/5782404 Reviewed-by: Wan-Teh Chang <wtc@google.com>
This commit is contained in:
parent
5dfa75670d
commit
336e6fd25b
@ -11,7 +11,6 @@
|
||||
#include "libyuv/convert.h"
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
#include "libyuv/convert_from.h" // For I420ToNV12()
|
||||
#include "libyuv/cpu_id.h"
|
||||
#include "libyuv/planar_functions.h"
|
||||
#include "libyuv/rotate.h"
|
||||
@ -659,10 +658,22 @@ int I010ToNV12(const uint16_t* src_y,
|
||||
int dst_stride_uv,
|
||||
int width,
|
||||
int height) {
|
||||
int r;
|
||||
int y;
|
||||
int halfwidth = (width + 1) >> 1;
|
||||
int halfheight = (height + 1) >> 1;
|
||||
const int scale = 16385; // 16384 for 10 bits
|
||||
void (*Convert16To8Row)(const uint16_t* src_y, uint8_t* dst_y, int scale,
|
||||
int width) = Convert16To8Row_C;
|
||||
void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v,
|
||||
uint8_t* dst_uv, int width) = MergeUVRow_C;
|
||||
if ((!src_y && dst_y) || !src_u || !src_v || !dst_uv || width <= 0 ||
|
||||
height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
int halfheight = (height + 1) >> 1;
|
||||
halfheight = (height + 1) >> 1;
|
||||
src_y = src_y + (height - 1) * src_stride_y;
|
||||
src_u = src_u + (halfheight - 1) * src_stride_u;
|
||||
src_v = src_v + (halfheight - 1) * src_stride_v;
|
||||
@ -670,34 +681,109 @@ int I010ToNV12(const uint16_t* src_y,
|
||||
src_stride_u = -src_stride_u;
|
||||
src_stride_v = -src_stride_v;
|
||||
}
|
||||
|
||||
// Allocate temporary buffers for 3 planes in 8 bit.
|
||||
{
|
||||
align_buffer_64(temp_y, width * height);
|
||||
align_buffer_64(temp_u, ((width + 1) / 2) * ((height + 1) / 2));
|
||||
align_buffer_64(temp_v, ((width + 1) / 2) * ((height + 1) / 2));
|
||||
|
||||
int temp_stride_y = width;
|
||||
int temp_stride_u = (width + 1) / 2;
|
||||
int temp_stride_v = (width + 1) / 2;
|
||||
|
||||
// The first step is to convert 10 bit YUV I010 to 8 bit I420 with 3 planes.
|
||||
r = I010ToI420(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
|
||||
temp_y, temp_stride_y, temp_u, temp_stride_u, temp_v,
|
||||
temp_stride_v, width, height);
|
||||
if (!r) {
|
||||
// The second step is to convert 8 bit I420 with 3 planes to 8 bit NV12 with 2 planes.
|
||||
r = I420ToNV12(temp_y, temp_stride_y, temp_u, temp_stride_u, temp_v,
|
||||
temp_stride_v, dst_y, dst_stride_y, dst_uv, dst_stride_uv,
|
||||
width, height);
|
||||
#if defined(HAS_CONVERT16TO8ROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
Convert16To8Row = Convert16To8Row_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
Convert16To8Row = Convert16To8Row_NEON;
|
||||
}
|
||||
|
||||
// Free temporary buffers.
|
||||
free_aligned_buffer_64(temp_y);
|
||||
free_aligned_buffer_64(temp_u);
|
||||
free_aligned_buffer_64(temp_v);
|
||||
}
|
||||
return r;
|
||||
#endif
|
||||
#if defined(HAS_CONVERT16TO8ROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
Convert16To8Row = Convert16To8Row_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
Convert16To8Row = Convert16To8Row_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_CONVERT16TO8ROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
Convert16To8Row = Convert16To8Row_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
Convert16To8Row = Convert16To8Row_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAS_MERGEUVROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
MergeUVRow = MergeUVRow_Any_SSE2;
|
||||
if (IS_ALIGNED(halfwidth, 16)) {
|
||||
MergeUVRow = MergeUVRow_SSE2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_MERGEUVROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
MergeUVRow = MergeUVRow_Any_AVX2;
|
||||
if (IS_ALIGNED(halfwidth, 16)) {
|
||||
MergeUVRow = MergeUVRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_MERGEUVROW_AVX512BW)
|
||||
if (TestCpuFlag(kCpuHasAVX512BW)) {
|
||||
MergeUVRow = MergeUVRow_Any_AVX512BW;
|
||||
if (IS_ALIGNED(halfwidth, 32)) {
|
||||
MergeUVRow = MergeUVRow_AVX512BW;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_MERGEUVROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
MergeUVRow = MergeUVRow_Any_NEON;
|
||||
if (IS_ALIGNED(halfwidth, 16)) {
|
||||
MergeUVRow = MergeUVRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_MERGEUVROW_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
MergeUVRow = MergeUVRow_Any_MSA;
|
||||
if (IS_ALIGNED(halfwidth, 16)) {
|
||||
MergeUVRow = MergeUVRow_MSA;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_MERGEUVROW_LSX)
|
||||
if (TestCpuFlag(kCpuHasLSX)) {
|
||||
MergeUVRow = MergeUVRow_Any_LSX;
|
||||
if (IS_ALIGNED(halfwidth, 16)) {
|
||||
MergeUVRow = MergeUVRow_LSX;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_MERGEUVROW_RVV)
|
||||
if (TestCpuFlag(kCpuHasRVV)) {
|
||||
MergeUVRow = MergeUVRow_RVV;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Convert Y plane.
|
||||
if (dst_y) {
|
||||
Convert16To8Plane(src_y, src_stride_y, dst_y, dst_stride_y, scale, width,
|
||||
height);
|
||||
}
|
||||
|
||||
{
|
||||
// Allocate a row of uv.
|
||||
align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
|
||||
uint8_t* row_v = row_u + ((halfwidth + 31) & ~31);
|
||||
if (!row_u)
|
||||
return 1;
|
||||
|
||||
for (y = 0; y < halfheight; ++y) {
|
||||
Convert16To8Row(src_u, row_u, scale, halfwidth);
|
||||
Convert16To8Row(src_v, row_v, scale, halfwidth);
|
||||
MergeUVRow(row_u, row_v, dst_uv, halfwidth);
|
||||
src_u += src_stride_u;
|
||||
src_v += src_stride_v;
|
||||
dst_uv += dst_stride_uv;
|
||||
}
|
||||
free_aligned_buffer_64(row_u);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user