From 42d76a342f9f0775d5f5fd47f7ef1a9ba6444074 Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Tue, 1 Mar 2022 04:30:20 -0800 Subject: [PATCH] RAWToJNV21 function with 2 step conversion RAWToJ420 + J420ToNV21 on row level Pixel 6 RAWToJNV21_Opt (320 ms) Skylake Xeon RAWToJNV21_Opt (302 ms) Bug: b/220171611 Change-Id: I39dcce9cf56c576b95666bb4fb1baccf9fbc7f7a Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3495876 Reviewed-by: richard winterton Commit-Queue: Frank Barchard --- README.chromium | 2 +- docs/getting_started.md | 8 +- include/libyuv/convert_from_argb.h | 11 ++ include/libyuv/version.h | 2 +- source/convert.cc | 3 +- source/convert_from.cc | 3 +- source/convert_from_argb.cc | 203 +++++++++++++++++++++++++++++ unit_test/convert_test.cc | 1 + 8 files changed, 223 insertions(+), 10 deletions(-) diff --git a/README.chromium b/README.chromium index b0ed0b964..8e9fa4746 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 1811 +Version: 1812 License: BSD License File: LICENSE diff --git a/docs/getting_started.md b/docs/getting_started.md index ffe0369e7..15b19ab21 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -27,12 +27,12 @@ Then you'll get a .gclient file like: }, ]; -For iOS add `target_os=['ios'];` to your OSX .gclient and run `gclient sync.` +For iOS add `;target_os=['ios'];` to your OSX .gclient and run `gclient sync.` -Browse the Git reprository: https://chromium.googlesource.com/libyuv/libyuv/+/main +Browse the Git reprository: https://chromium.googlesource.com/libyuv/libyuv/+/master ### Android -For Android add `target_os=['android'];` to your Linux .gclient +For Android add `;target_os=['android'];` to your Linux .gclient solutions = [ { "name" : "src", @@ -43,7 +43,7 @@ For Android add `target_os=['android'];` to your Linux .gclient }, "safesync_url": "", }, - ] + ]; target_os = ["android", "linux"]; Then run: diff --git a/include/libyuv/convert_from_argb.h b/include/libyuv/convert_from_argb.h index bf4878604..2a488838a 100644 --- a/include/libyuv/convert_from_argb.h +++ b/include/libyuv/convert_from_argb.h @@ -327,6 +327,17 @@ int ARGBToUYVY(const uint8_t* src_argb, int width, int height); +// RAW to JNV21 full range NV21 +LIBYUV_API +int RAWToJNV21(const uint8_t* src_raw, + int src_stride_raw, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_vu, + int dst_stride_vu, + int width, + int height); + #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/include/libyuv/version.h b/include/libyuv/version.h index fb6831cf3..dcdc8d94f 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1811 +#define LIBYUV_VERSION 1812 #endif // INCLUDE_LIBYUV_VERSION_H_ \ No newline at end of file diff --git a/source/convert.cc b/source/convert.cc index 9c5e8aa8f..875afb30c 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -801,8 +801,7 @@ int I444ToNV12(const uint8_t* src_y, int dst_stride_uv, int width, int height) { - if (!src_y || !src_u || !src_v || !dst_y || !dst_uv || width <= 0 || - height == 0) { + if (!src_y || !src_u || !src_v || !dst_uv || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. diff --git a/source/convert_from.cc b/source/convert_from.cc index 85b87bdbd..d331ed2d7 100644 --- a/source/convert_from.cc +++ b/source/convert_from.cc @@ -645,8 +645,7 @@ int I420ToNV12(const uint8_t* src_y, int height) { int halfwidth = (width + 1) / 2; int halfheight = (height + 1) / 2; - if (!src_y || !src_u || !src_v || !dst_y || !dst_uv || width <= 0 || - height == 0) { + if (!src_y || !src_u || !src_v || !dst_uv || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. diff --git a/source/convert_from_argb.cc b/source/convert_from_argb.cc index 519674d37..d8b9f7bbe 100644 --- a/source/convert_from_argb.cc +++ b/source/convert_from_argb.cc @@ -2316,6 +2316,209 @@ int RGBAToJ400(const uint8_t* src_rgba, return 0; } +// Enabled if 1 pass is available +#if defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_MSA) +#define HAS_RAWTOYJROW +#endif + +// RAW to JNV21 full range NV21 +LIBYUV_API +int RAWToJNV21(const uint8_t* src_raw, + int src_stride_raw, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_vu, + int dst_stride_vu, + int width, + int height) { + int y; + int halfwidth = (width + 1) >> 1; +#if defined(HAS_RAWTOYJROW) + void (*RAWToUVJRow)(const uint8_t* src_raw, int src_stride_raw, + uint8_t* dst_u, uint8_t* dst_v, int width) = + RAWToUVJRow_C; + void (*RAWToYJRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) = + RAWToYJRow_C; +#else + void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = + RAWToARGBRow_C; + void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ARGBToUVJRow_C; + void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = + ARGBToYJRow_C; +#endif + void (*MergeUVRow_)(const uint8_t* src_u, const uint8_t* src_v, + uint8_t* dst_vu, int width) = MergeUVRow_C; + if (!src_raw || !dst_y || !dst_vu || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_raw = src_raw + (height - 1) * src_stride_raw; + src_stride_raw = -src_stride_raw; + } + +#if defined(HAS_RAWTOYJROW) + +// Neon version does direct RAW to YUV. +#if defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + RAWToUVJRow = RAWToUVJRow_Any_NEON; + RAWToYJRow = RAWToYJRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + RAWToYJRow = RAWToYJRow_NEON; + if (IS_ALIGNED(width, 16)) { + RAWToUVJRow = RAWToUVJRow_NEON; + } + } + } +#endif +#if defined(HAS_RAWTOYJROW_MSA) && defined(HAS_RAWTOUVJROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + RAWToUVJRow = RAWToUVJRow_Any_MSA; + RAWToYJRow = RAWToYJRow_Any_MSA; + if (IS_ALIGNED(width, 16)) { + RAWToYJRow = RAWToYJRow_MSA; + RAWToUVJRow = RAWToUVJRow_MSA; + } + } +#endif + +// Other platforms do intermediate conversion from RAW to ARGB. +#else // HAS_RAWTOYJROW + +#if defined(HAS_RAWTOARGBROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + RAWToARGBRow = RAWToARGBRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + RAWToARGBRow = RAWToARGBRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOYJROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToYJRow = ARGBToYJRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToYJRow = ARGBToYJRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOYJROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToYJRow = ARGBToYJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToYJRow = ARGBToYJRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOUVJROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUVJRow = ARGBToUVJRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOUVJROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVJRow = ARGBToUVJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToUVJRow = ARGBToUVJRow_AVX2; + } + } +#endif +#endif // HAS_RAWTOYJROW +#if defined(HAS_MERGEUVROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + MergeUVRow_ = MergeUVRow_Any_SSE2; + if (IS_ALIGNED(halfwidth, 16)) { + MergeUVRow_ = MergeUVRow_SSE2; + } + } +#endif +#if defined(HAS_MERGEUVROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + MergeUVRow_ = MergeUVRow_Any_AVX2; + if (IS_ALIGNED(halfwidth, 32)) { + MergeUVRow_ = MergeUVRow_AVX2; + } + } +#endif +#if defined(HAS_MERGEUVROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + MergeUVRow_ = MergeUVRow_Any_NEON; + if (IS_ALIGNED(halfwidth, 16)) { + MergeUVRow_ = MergeUVRow_NEON; + } + } +#endif +#if defined(HAS_MERGEUVROW_MSA) + if (TestCpuFlag(kCpuHasMSA)) { + MergeUVRow_ = MergeUVRow_Any_MSA; + if (IS_ALIGNED(halfwidth, 16)) { + MergeUVRow_ = MergeUVRow_MSA; + } + } +#endif +#if defined(HAS_MERGEUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + MergeUVRow_ = MergeUVRow_Any_LSX; + if (IS_ALIGNED(halfwidth, 16)) { + MergeUVRow_ = MergeUVRow_LSX; + } + } +#endif + { + // Allocate a row of uv. + align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2); + uint8_t* row_v = row_u + ((halfwidth + 31) & ~31); +#if !defined(HAS_RAWTOYJROW) + // Allocate 2 rows of ARGB. + const int kRowSize = (width * 4 + 31) & ~31; + align_buffer_64(row, kRowSize * 2); +#endif + + for (y = 0; y < height - 1; y += 2) { +#if defined(HAS_RAWTOYJROW) + RAWToUVJRow(src_raw, src_stride_raw, row_u, row_v, width); + MergeUVRow_(row_v, row_u, dst_vu, halfwidth); + RAWToYJRow(src_raw, dst_y, width); + RAWToYJRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width); +#else + RAWToARGBRow(src_raw, row, width); + RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width); + ARGBToUVJRow(row, kRowSize, row_u, row_v, width); + MergeUVRow_(row_v, row_u, dst_vu, halfwidth); + ARGBToYJRow(row, dst_y, width); + ARGBToYJRow(row + kRowSize, dst_y + dst_stride_y, width); +#endif + src_raw += src_stride_raw * 2; + dst_y += dst_stride_y * 2; + dst_vu += dst_stride_vu; + } + if (height & 1) { +#if defined(HAS_RAWTOYJROW) + RAWToUVJRow(src_raw, 0, row_u, row_v, width); + MergeUVRow_(row_v, row_u, dst_vu, halfwidth); + RAWToYJRow(src_raw, dst_y, width); +#else + RAWToARGBRow(src_raw, row, width); + ARGBToUVJRow(row, 0, row_u, row_v, width); + MergeUVRow_(row_v, row_u, dst_vu, halfwidth); + ARGBToYJRow(row, dst_y, width); +#endif + } +#if !defined(HAS_RAWTOYJROW) + free_aligned_buffer_64(row); +#endif + free_aligned_buffer_64(row_u); + } + return 0; +} +#undef HAS_RAWTOYJROW + #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc index df3dce197..2701ac166 100644 --- a/unit_test/convert_test.cc +++ b/unit_test/convert_test.cc @@ -1292,6 +1292,7 @@ TESTATOBIPLANAR(ARGB, 1, 4, NV12, 2, 2) TESTATOBIPLANAR(ARGB, 1, 4, NV21, 2, 2) TESTATOBIPLANAR(ABGR, 1, 4, NV12, 2, 2) TESTATOBIPLANAR(ABGR, 1, 4, NV21, 2, 2) +TESTATOBIPLANAR(RAW, 1, 3, JNV21, 2, 2) TESTATOBIPLANAR(YUY2, 2, 4, NV12, 2, 2) TESTATOBIPLANAR(UYVY, 2, 4, NV12, 2, 2) TESTATOBIPLANAR(AYUV, 1, 4, NV12, 2, 2)