MM21ToYUY2 and ABGRToJ420 conversion

MM21 to YUY2 use zip1 for performance

Cortex A510
Was MM21ToYUY2 (612 ms)
Now MM21ToYUY2 (573 ms)

Prefetches help Cortex A53
Was MM21ToYUY2 (4998 ms)
Now MM21ToYUY2 (1900 ms)

Pixel 4 Cortex A76
Was MM21ToYUY2 (215 ms)
Now MM21ToYUY2 (173 ms)

ABGRToJ420
- NEON, SSSE3 and AVX2 row functions
- J400, J420 and J422 formats.
- Added AVX2 for UV on ARGBToJ420.  Was SSSE3

Same code/performance as ARGBToJ420 but with constants re-ordered.
Pixel 4
ABGRToJ420_Opt (623 ms)
ABGRToJ422_Opt (702 ms)
ABGRToJ400_Opt (238 ms)

Skylake Xeon
With LIBYUV_BIT_EXACT which uses C for UV
ABGRToJ420_Opt (988 ms)
ABGRToJ422_Opt (1872 ms)
ABGRToJ400_Opt (186 ms)
Skylake Xeon using AVX2
ABGRToJ420_Opt (251 ms)
ABGRToJ422_Opt (245 ms)
ABGRToJ400_Opt (184 ms)
Skylake Xeon using SSSE3
ABGRToJ420_Opt (328 ms)
ABGRToJ422_Opt (362 ms)
ABGRToJ400_Opt (185 ms)

Bug: b/238137982
Change-Id: I559c3fe3fb80fa2ce5be3d8218736f9cbc627666
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/3832111
Reviewed-by: Justin Green <greenjustin@google.com>
Reviewed-by: Wan-Teh Chang <wtc@google.com>
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
Frank Barchard 2022-08-16 10:22:05 -07:00 committed by Frank Barchard
parent 1c5a8bb17a
commit 65e7c9d570
16 changed files with 1255 additions and 234 deletions

View File

@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1839
Version: 1840
License: BSD
License File: LICENSE

View File

@ -151,6 +151,17 @@ int MM21ToI420(const uint8_t* src_y,
int width,
int height);
// Convert MM21 to YUY2
LIBYUV_API
int MM21ToYUY2(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_yuy2,
int dst_stride_yuy2,
int width,
int height);
// Convert I422 to NV21.
LIBYUV_API
int I422ToNV21(const uint8_t* src_y,

View File

@ -209,10 +209,10 @@ int ARGBToJ420(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_yj,
int dst_stride_yj,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
uint8_t* dst_uj,
int dst_stride_uj,
uint8_t* dst_vj,
int dst_stride_vj,
int width,
int height);
@ -222,10 +222,10 @@ int ARGBToJ422(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_yj,
int dst_stride_yj,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
uint8_t* dst_uj,
int dst_stride_uj,
uint8_t* dst_vj,
int dst_stride_vj,
int width,
int height);
@ -238,6 +238,41 @@ int ARGBToJ400(const uint8_t* src_argb,
int width,
int height);
// Convert ABGR to J420. (JPeg full range I420).
LIBYUV_API
int ABGRToJ420(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_yj,
int dst_stride_yj,
uint8_t* dst_uj,
int dst_stride_uj,
uint8_t* dst_vj,
int dst_stride_vj,
int width,
int height);
// Convert ABGR to J422.
LIBYUV_API
int ABGRToJ422(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_yj,
int dst_stride_yj,
uint8_t* dst_uj,
int dst_stride_uj,
uint8_t* dst_vj,
int dst_stride_vj,
int width,
int height);
// Convert ABGR to J400. (JPeg full range).
LIBYUV_API
int ABGRToJ400(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_yj,
int dst_stride_yj,
int width,
int height);
// Convert RGBA to J400. (JPeg full range).
LIBYUV_API
int RGBAToJ400(const uint8_t* src_rgba,

View File

@ -105,6 +105,17 @@ void DetileSplitUVPlane(const uint8_t* src_uv,
int height,
int tile_height);
// Convert a Y and UV plane of tiles into interlaced YUY2.
void DetileToYUY2(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_yuy2,
int dst_stride_yuy2,
int width,
int height,
int tile_height);
// Split interleaved UV plane into separate U and V planes.
LIBYUV_API
void SplitUVPlane(const uint8_t* src_uv,

View File

@ -282,12 +282,14 @@ extern "C" {
// The following are available for gcc/clang x86 platforms:
// TODO(fbarchard): Port to Visual C
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
#define HAS_AB64TOARGBROW_SSSE3
#define HAS_ABGRTOAR30ROW_SSSE3
#define HAS_ABGRTOUVJROW_SSSE3
#define HAS_ABGRTOYJROW_SSSE3
#define HAS_AR64TOARGBROW_SSSE3
#define HAS_ARGBTOAB64ROW_SSSE3
#define HAS_ARGBTOAR30ROW_SSSE3
#define HAS_ARGBTOAR64ROW_SSSE3
#define HAS_ARGBTOAB64ROW_SSSE3
#define HAS_AR64TOARGBROW_SSSE3
#define HAS_AB64TOARGBROW_SSSE3
#define HAS_CONVERT16TO8ROW_SSSE3
#define HAS_CONVERT8TO16ROW_SSE2
#define HAS_DETILEROW_SSE2
@ -298,12 +300,12 @@ extern "C" {
#define HAS_I212TOAR30ROW_SSSE3
#define HAS_I212TOARGBROW_SSSE3
#define HAS_I400TOARGBROW_SSE2
#define HAS_I422TOAR30ROW_SSSE3
#define HAS_I410TOAR30ROW_SSSE3
#define HAS_I410TOARGBROW_SSSE3
#define HAS_I422TOAR30ROW_SSSE3
#define HAS_MERGEARGBROW_SSE2
#define HAS_MERGEXRGBROW_SSE2
#define HAS_MERGERGBROW_SSSE3
#define HAS_MERGEXRGBROW_SSE2
#define HAS_MIRRORUVROW_SSSE3
#define HAS_NV21TOYUV24ROW_SSSE3
#define HAS_P210TOAR30ROW_SSSE3
@ -340,26 +342,19 @@ extern "C" {
#define HAS_ABGRTOUVROW_AVX2
#define HAS_ABGRTOYROW_AVX2
#endif
#define HAS_AB64TOARGBROW_AVX2
#define HAS_ABGRTOUVJROW_AVX2
#define HAS_ABGRTOYJROW_AVX2
#define HAS_AR64TOARGBROW_AVX2
#define HAS_ARGBTOAB64ROW_AVX2
#define HAS_ARGBTOAR30ROW_AVX2
#define HAS_ARGBTOAR64ROW_AVX2
#define HAS_ARGBTORAWROW_AVX2
#define HAS_ARGBTORGB24ROW_AVX2
#define HAS_ARGBTOAR64ROW_AVX2
#define HAS_ARGBTOAB64ROW_AVX2
#define HAS_AR64TOARGBROW_AVX2
#define HAS_AB64TOARGBROW_AVX2
#define HAS_CONVERT16TO8ROW_AVX2
#define HAS_INTERPOLATEROW_16TO8_AVX2
#define HAS_CONVERT8TO16ROW_AVX2
#define HAS_DIVIDEROW_16_AVX2
#define HAS_HALFMERGEUVROW_AVX2
#define HAS_MERGEAR64ROW_AVX2
#define HAS_MERGEARGB16TO8ROW_AVX2
#define HAS_MERGEARGBROW_AVX2
#define HAS_MERGEXR30ROW_AVX2
#define HAS_MERGEXR64ROW_AVX2
#define HAS_MERGEXRGB16TO8ROW_AVX2
#define HAS_MERGEXRGBROW_AVX2
#define HAS_NV21TOYUV24ROW_AVX2
#define HAS_I210TOAR30ROW_AVX2
#define HAS_I210TOARGBROW_AVX2
#define HAS_I212TOAR30ROW_AVX2
@ -367,16 +362,25 @@ extern "C" {
#define HAS_I400TOARGBROW_AVX2
#define HAS_I410TOAR30ROW_AVX2
#define HAS_I410TOARGBROW_AVX2
#define HAS_I422TOAR30ROW_AVX2
#define HAS_I422TOUYVYROW_AVX2
#define HAS_I422TOYUY2ROW_AVX2
#define HAS_INTERPOLATEROW_16TO8_AVX2
#define HAS_MERGEAR64ROW_AVX2
#define HAS_MERGEARGB16TO8ROW_AVX2
#define HAS_MERGEARGBROW_AVX2
#define HAS_MERGEUVROW_16_AVX2
#define HAS_MERGEXR30ROW_AVX2
#define HAS_MERGEXR64ROW_AVX2
#define HAS_MERGEXRGB16TO8ROW_AVX2
#define HAS_MERGEXRGBROW_AVX2
#define HAS_MIRRORUVROW_AVX2
#define HAS_MULTIPLYROW_16_AVX2
#define HAS_NV21TOYUV24ROW_AVX2
#define HAS_P210TOAR30ROW_AVX2
#define HAS_P210TOARGBROW_AVX2
#define HAS_P410TOAR30ROW_AVX2
#define HAS_P410TOARGBROW_AVX2
#define HAS_I422TOAR30ROW_AVX2
#define HAS_I422TOUYVYROW_AVX2
#define HAS_I422TOYUY2ROW_AVX2
#define HAS_MERGEUVROW_16_AVX2
#define HAS_MIRRORUVROW_AVX2
#define HAS_MULTIPLYROW_16_AVX2
#if !defined(LIBYUV_BIT_EXACT)
#define HAS_RGBATOYJROW_AVX2
#endif
@ -433,8 +437,10 @@ extern "C" {
#define HAS_ARGBTORGB565ROW_NEON
#define HAS_ARGBTOUV444ROW_NEON
#define HAS_ARGBTOUVJROW_NEON
#define HAS_ABGRTOUVJROW_NEON
#define HAS_ARGBTOUVROW_NEON
#define HAS_ARGBTOYJROW_NEON
#define HAS_ABGRTOYJROW_NEON
#define HAS_ARGBTOYROW_NEON
#define HAS_AYUVTOUVROW_NEON
#define HAS_AYUVTOVUROW_NEON
@ -446,6 +452,7 @@ extern "C" {
#define HAS_COPYROW_NEON
#define HAS_DETILEROW_NEON
#define HAS_DETILESPLITUVROW_NEON
#define HAS_DETILETOYUY2_NEON
#define HAS_DIVIDEROW_16_NEON
#define HAS_HALFFLOATROW_NEON
#define HAS_HALFMERGEUVROW_NEON
@ -574,6 +581,7 @@ extern "C" {
#define HAS_ARGBTORGB565ROW_MSA
#define HAS_ARGBTOUV444ROW_MSA
#define HAS_ARGBTOUVJROW_MSA
#define HAS_ABGRTOUVJROW_MSA
#define HAS_ARGBTOUVROW_MSA
#define HAS_ARGBTOYJROW_MSA
#define HAS_ARGBTOYROW_MSA
@ -1148,9 +1156,13 @@ void ARGBToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ABGRToYRow_AVX2(const uint8_t* src_abgr, uint8_t* dst_y, int width);
void ABGRToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGBToYRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width);
void ARGBToYJRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width);
void ARGBToYJRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width);
void ARGBToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGBToYJRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width);
void ABGRToYRow_SSSE3(const uint8_t* src_abgr, uint8_t* dst_y, int width);
void ABGRToYJRow_SSSE3(const uint8_t* src_abgr, uint8_t* dst_y, int width);
void ABGRToYJRow_AVX2(const uint8_t* src_abgr, uint8_t* dst_y, int width);
void ABGRToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGBAToYJRow_AVX2(const uint8_t* src_rgba, uint8_t* dst_y, int width);
void RGBAToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGBAToYJRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width);
@ -1164,8 +1176,9 @@ void RAWToYJRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_yj, int width);
void RGB24ToYJRow_AVX2(const uint8_t* src_rgb24, uint8_t* dst_yj, int width);
void RAWToYJRow_AVX2(const uint8_t* src_raw, uint8_t* dst_yj, int width);
void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width);
void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width);
void RGBAToYJRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width);
void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_yj, int width);
void ABGRToYJRow_NEON(const uint8_t* src_abgr, uint8_t* dst_yj, int width);
void RGBAToYJRow_NEON(const uint8_t* src_rgba, uint8_t* dst_yj, int width);
void ARGBToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void ARGBToYJRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
void ARGBToYRow_LASX(const uint8_t* src_argb0, uint8_t* dst_y, int width);
@ -1203,6 +1216,11 @@ void ARGBToUVJRow_NEON(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ABGRToUVJRow_NEON(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_uj,
uint8_t* dst_vj,
int width);
void BGRAToUVRow_NEON(const uint8_t* src_bgra,
int src_stride_bgra,
uint8_t* dst_u,
@ -1258,6 +1276,11 @@ void ARGBToUVJRow_MSA(const uint8_t* src_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ABGRToUVJRow_MSA(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void BGRAToUVRow_MSA(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
@ -1396,6 +1419,7 @@ void RAWToYRow_LASX(const uint8_t* src_raw, uint8_t* dst_y, int width);
void ARGBToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
void ARGBToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
void ABGRToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
void RGBAToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
void BGRAToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
void ABGRToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width);
@ -1409,6 +1433,7 @@ void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width);
void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width);
void ARGBToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGBToYJRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ABGRToYJRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGBAToYJRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void BGRAToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ABGRToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
@ -1423,6 +1448,7 @@ void RGB24ToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RAWToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGBToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ARGBToYJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ABGRToYJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void RGBAToYJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void BGRAToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
void ABGRToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
@ -1485,6 +1511,11 @@ void ARGBToUVJRow_AVX2(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ABGRToUVJRow_AVX2(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ARGBToUVRow_SSSE3(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_u,
@ -1495,6 +1526,11 @@ void ARGBToUVJRow_SSSE3(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ABGRToUVJRow_SSSE3(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void BGRAToUVRow_SSSE3(const uint8_t* src_bgra,
int src_stride_bgra,
uint8_t* dst_u,
@ -1525,6 +1561,11 @@ void ARGBToUVJRow_Any_AVX2(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ABGRToUVJRow_Any_AVX2(const uint8_t* src_ptr,
int src_stride,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ARGBToUVRow_Any_SSSE3(const uint8_t* src_ptr,
int src_stride,
uint8_t* dst_u,
@ -1535,6 +1576,11 @@ void ARGBToUVJRow_Any_SSSE3(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ABGRToUVJRow_Any_SSSE3(const uint8_t* src_ptr,
int src_stride,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void BGRAToUVRow_Any_SSSE3(const uint8_t* src_ptr,
int src_stride,
uint8_t* dst_u,
@ -1582,6 +1628,11 @@ void ARGBToUVJRow_Any_NEON(const uint8_t* src_ptr,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ABGRToUVJRow_Any_NEON(const uint8_t* src_ptr,
int src_stride,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void BGRAToUVRow_Any_NEON(const uint8_t* src_ptr,
int src_stride,
uint8_t* dst_u,
@ -1747,12 +1798,12 @@ void ARGBToUVJRow_C(const uint8_t* src_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ARGBToUVRow_C(const uint8_t* src_rgb,
void ABGRToUVJRow_C(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void ARGBToUVJRow_C(const uint8_t* src_rgb,
void ARGBToUVRow_C(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
@ -1991,6 +2042,24 @@ void DetileSplitUVRow_Any_NEON(const uint8_t* src_uv,
uint8_t* dst_u,
uint8_t* dst_v,
int width);
void DetileToYUY2_C(const uint8_t* src_y,
ptrdiff_t src_y_tile_stride,
const uint8_t* src_uv,
ptrdiff_t src_uv_tile_stride,
uint8_t* dst_yuy2,
int width);
void DetileToYUY2_NEON(const uint8_t* src_y,
ptrdiff_t src_y_tile_stride,
const uint8_t* src_uv,
ptrdiff_t src_uv_tile_stride,
uint8_t* dst_yuy2,
int width);
void DetileToYUY2_Any_NEON(const uint8_t* src_y,
ptrdiff_t src_y_tile_stride,
const uint8_t* src_uv,
ptrdiff_t src_uv_tile_stride,
uint8_t* dst_yuy2,
int width);
void MergeUVRow_C(const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_uv,
@ -2604,8 +2673,8 @@ void Convert16To8Row_NEON(const uint16_t* src_y,
uint8_t* dst_y,
int scale,
int width);
void Convert16To8Row_Any_NEON(const uint16_t* src_y,
uint8_t* dst_y,
void Convert16To8Row_Any_NEON(const uint16_t* src_ptr,
uint8_t* dst_ptr,
int scale,
int width);
@ -3823,13 +3892,13 @@ void NV21ToRGB24Row_Any_AVX2(const uint8_t* y_buf,
uint8_t* dst_ptr,
const struct YuvConstants* yuvconstants,
int width);
void NV21ToYUV24Row_Any_SSSE3(const uint8_t* src_y,
const uint8_t* src_vu,
uint8_t* dst_yuv24,
void NV21ToYUV24Row_Any_SSSE3(const uint8_t* y_buf,
const uint8_t* uv_buf,
uint8_t* dst_ptr,
int width);
void NV21ToYUV24Row_Any_AVX2(const uint8_t* src_y,
const uint8_t* src_vu,
uint8_t* dst_yuv24,
void NV21ToYUV24Row_Any_AVX2(const uint8_t* y_buf,
const uint8_t* uv_buf,
uint8_t* dst_ptr,
int width);
void NV12ToRGB565Row_Any_SSSE3(const uint8_t* y_buf,
const uint8_t* uv_buf,

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1839
#define LIBYUV_VERSION 1840
#endif // INCLUDE_LIBYUV_VERSION_H_

View File

@ -262,10 +262,10 @@ int I210ToI420(const uint16_t* src_y,
height);
ScalePlaneVertical_16To8(height, uv_width, uv_height, src_stride_u,
dst_stride_u, src_u, dst_u, 0, 32768, dy,
/*bpp=*/1, scale, kFilterBilinear);
/*wpp=*/1, scale, kFilterBilinear);
ScalePlaneVertical_16To8(height, uv_width, uv_height, src_stride_v,
dst_stride_v, src_v, dst_v, 0, 32768, dy,
/*bpp=*/1, scale, kFilterBilinear);
/*wpp=*/1, scale, kFilterBilinear);
}
return 0;
}
@ -713,6 +713,25 @@ int MM21ToI420(const uint8_t* src_y,
return 0;
}
LIBYUV_API
int MM21ToYUY2(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_yuy2,
int dst_stride_yuy2,
int width,
int height) {
if (!src_y || !src_uv || !dst_yuy2 || width <= 0) {
return -1;
}
DetileToYUY2(src_y, src_stride_y, src_uv, src_stride_uv, dst_yuy2,
dst_stride_yuy2, width, height, 32);
return 0;
}
#ifdef I422TONV21_ROW_VERSION
// Unittest fails for this version.
// 422 chroma is 1/2 width, 1x height

View File

@ -1858,19 +1858,19 @@ int ARGBToJ420(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_yj,
int dst_stride_yj,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
uint8_t* dst_uj,
int dst_stride_uj,
uint8_t* dst_vj,
int dst_stride_vj,
int width,
int height) {
int y;
void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb,
uint8_t* dst_u, uint8_t* dst_v, int width) =
uint8_t* dst_uj, uint8_t* dst_vj, int width) =
ARGBToUVJRow_C;
void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) =
ARGBToYJRow_C;
if (!src_argb || !dst_yj || !dst_u || !dst_v || width <= 0 || height == 0) {
if (!src_argb || !dst_yj || !dst_uj || !dst_vj || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
@ -1879,6 +1879,22 @@ int ARGBToJ420(const uint8_t* src_argb,
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
#if defined(HAS_ARGBTOYJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYJRow = ARGBToYJRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToYJRow = ARGBToYJRow_NEON;
}
}
#endif
#if defined(HAS_ARGBTOUVJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUVJRow = ARGBToUVJRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVJRow = ARGBToUVJRow_NEON;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
@ -1903,19 +1919,11 @@ int ARGBToJ420(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYJRow = ARGBToYJRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToYJRow = ARGBToYJRow_NEON;
}
}
#endif
#if defined(HAS_ARGBTOUVJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToUVJRow = ARGBToUVJRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVJRow = ARGBToUVJRow_NEON;
#if defined(HAS_ARGBTOUVJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVJRow = ARGBToUVJRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToUVJRow = ARGBToUVJRow_AVX2;
}
}
#endif
@ -1931,16 +1939,6 @@ int ARGBToJ420(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOYJROW_LSX) && defined(HAS_ARGBTOUVJROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ARGBToYJRow = ARGBToYJRow_Any_LSX;
ARGBToUVJRow = ARGBToUVJRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
ARGBToYJRow = ARGBToYJRow_LSX;
ARGBToUVJRow = ARGBToUVJRow_LSX;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_LASX) && defined(HAS_ARGBTOUVJROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ARGBToYJRow = ARGBToYJRow_Any_LASX;
@ -1953,16 +1951,16 @@ int ARGBToJ420(const uint8_t* src_argb,
#endif
for (y = 0; y < height - 1; y += 2) {
ARGBToUVJRow(src_argb, src_stride_argb, dst_u, dst_v, width);
ARGBToUVJRow(src_argb, src_stride_argb, dst_uj, dst_vj, width);
ARGBToYJRow(src_argb, dst_yj, width);
ARGBToYJRow(src_argb + src_stride_argb, dst_yj + dst_stride_yj, width);
src_argb += src_stride_argb * 2;
dst_yj += dst_stride_yj * 2;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
dst_uj += dst_stride_uj;
dst_vj += dst_stride_vj;
}
if (height & 1) {
ARGBToUVJRow(src_argb, 0, dst_u, dst_v, width);
ARGBToUVJRow(src_argb, 0, dst_uj, dst_vj, width);
ARGBToYJRow(src_argb, dst_yj, width);
}
return 0;
@ -1974,19 +1972,19 @@ int ARGBToJ422(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_yj,
int dst_stride_yj,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
uint8_t* dst_uj,
int dst_stride_uj,
uint8_t* dst_vj,
int dst_stride_vj,
int width,
int height) {
int y;
void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb,
uint8_t* dst_u, uint8_t* dst_v, int width) =
uint8_t* dst_uj, uint8_t* dst_vj, int width) =
ARGBToUVJRow_C;
void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) =
ARGBToYJRow_C;
if (!src_argb || !dst_yj || !dst_u || !dst_v || width <= 0 || height == 0) {
if (!src_argb || !dst_yj || !dst_uj || !dst_vj || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
@ -1997,10 +1995,10 @@ int ARGBToJ422(const uint8_t* src_argb,
}
// Coalesce rows.
if (src_stride_argb == width * 4 && dst_stride_yj == width &&
dst_stride_u * 2 == width && dst_stride_v * 2 == width) {
dst_stride_uj * 2 == width && dst_stride_vj * 2 == width) {
width *= height;
height = 1;
src_stride_argb = dst_stride_yj = dst_stride_u = dst_stride_v = 0;
src_stride_argb = dst_stride_yj = dst_stride_uj = dst_stride_vj = 0;
}
#if defined(HAS_ARGBTOYJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
@ -2026,6 +2024,14 @@ int ARGBToJ422(const uint8_t* src_argb,
}
}
#endif
#if defined(HAS_ARGBTOUVJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToUVJRow = ARGBToUVJRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ARGBToUVJRow = ARGBToUVJRow_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOYJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToYJRow = ARGBToYJRow_Any_NEON;
@ -2076,130 +2082,12 @@ int ARGBToJ422(const uint8_t* src_argb,
#endif
for (y = 0; y < height; ++y) {
ARGBToUVJRow(src_argb, 0, dst_u, dst_v, width);
ARGBToUVJRow(src_argb, 0, dst_uj, dst_vj, width);
ARGBToYJRow(src_argb, dst_yj, width);
src_argb += src_stride_argb;
dst_yj += dst_stride_yj;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
}
return 0;
}
// Convert ARGB to AR64.
LIBYUV_API
int ARGBToAR64(const uint8_t* src_argb,
int src_stride_argb,
uint16_t* dst_ar64,
int dst_stride_ar64,
int width,
int height) {
int y;
void (*ARGBToAR64Row)(const uint8_t* src_argb, uint16_t* dst_ar64,
int width) = ARGBToAR64Row_C;
if (!src_argb || !dst_ar64 || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
// Coalesce rows.
if (src_stride_argb == width * 4 && dst_stride_ar64 == width * 4) {
width *= height;
height = 1;
src_stride_argb = dst_stride_ar64 = 0;
}
#if defined(HAS_ARGBTOAR64ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToAR64Row = ARGBToAR64Row_Any_SSSE3;
if (IS_ALIGNED(width, 4)) {
ARGBToAR64Row = ARGBToAR64Row_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOAR64ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToAR64Row = ARGBToAR64Row_Any_AVX2;
if (IS_ALIGNED(width, 8)) {
ARGBToAR64Row = ARGBToAR64Row_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOAR64ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToAR64Row = ARGBToAR64Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToAR64Row = ARGBToAR64Row_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
ARGBToAR64Row(src_argb, dst_ar64, width);
src_argb += src_stride_argb;
dst_ar64 += dst_stride_ar64;
}
return 0;
}
// Convert ARGB to AB64.
LIBYUV_API
int ARGBToAB64(const uint8_t* src_argb,
int src_stride_argb,
uint16_t* dst_ab64,
int dst_stride_ab64,
int width,
int height) {
int y;
void (*ARGBToAB64Row)(const uint8_t* src_argb, uint16_t* dst_ar64,
int width) = ARGBToAB64Row_C;
if (!src_argb || !dst_ab64 || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
// Coalesce rows.
if (src_stride_argb == width * 4 && dst_stride_ab64 == width * 4) {
width *= height;
height = 1;
src_stride_argb = dst_stride_ab64 = 0;
}
#if defined(HAS_ARGBTOAB64ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToAB64Row = ARGBToAB64Row_Any_SSSE3;
if (IS_ALIGNED(width, 4)) {
ARGBToAB64Row = ARGBToAB64Row_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOAB64ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToAB64Row = ARGBToAB64Row_Any_AVX2;
if (IS_ALIGNED(width, 8)) {
ARGBToAB64Row = ARGBToAB64Row_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOAB64ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToAB64Row = ARGBToAB64Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToAB64Row = ARGBToAB64Row_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
ARGBToAB64Row(src_argb, dst_ab64, width);
src_argb += src_stride_argb;
dst_ab64 += dst_stride_ab64;
dst_uj += dst_stride_uj;
dst_vj += dst_stride_vj;
}
return 0;
}
@ -2336,6 +2224,428 @@ int RGBAToJ400(const uint8_t* src_rgba,
return 0;
}
// Convert ABGR to J420. (JPeg full range I420).
LIBYUV_API
int ABGRToJ420(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_yj,
int dst_stride_yj,
uint8_t* dst_uj,
int dst_stride_uj,
uint8_t* dst_vj,
int dst_stride_vj,
int width,
int height) {
int y;
void (*ABGRToUVJRow)(const uint8_t* src_abgr0, int src_stride_abgr,
uint8_t* dst_uj, uint8_t* dst_vj, int width) =
ABGRToUVJRow_C;
void (*ABGRToYJRow)(const uint8_t* src_abgr, uint8_t* dst_yj, int width) =
ABGRToYJRow_C;
if (!src_abgr || !dst_yj || !dst_uj || !dst_vj || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_abgr = src_abgr + (height - 1) * src_stride_abgr;
src_stride_abgr = -src_stride_abgr;
}
#if defined(HAS_ABGRTOYJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ABGRToYJRow = ABGRToYJRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ABGRToYJRow = ABGRToYJRow_SSSE3;
}
}
#endif
#if defined(HAS_ABGRTOUVJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ABGRToUVJRow = ABGRToUVJRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ABGRToUVJRow = ABGRToUVJRow_SSSE3;
}
}
#endif
#if defined(HAS_ABGRTOYJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ABGRToYJRow = ABGRToYJRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ABGRToYJRow = ABGRToYJRow_AVX2;
}
}
#endif
#if defined(HAS_ABGRTOUVJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ABGRToUVJRow = ABGRToUVJRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ABGRToUVJRow = ABGRToUVJRow_AVX2;
}
}
#endif
#if defined(HAS_ABGRTOYJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ABGRToYJRow = ABGRToYJRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ABGRToYJRow = ABGRToYJRow_NEON;
}
}
#endif
#if defined(HAS_ABGRTOUVJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ABGRToUVJRow = ABGRToUVJRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ABGRToUVJRow = ABGRToUVJRow_NEON;
}
}
#endif
#if defined(HAS_ABGRTOYJROW_MSA) && defined(HAS_ABGRTOUVJROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ABGRToYJRow = ABGRToYJRow_Any_MSA;
ABGRToUVJRow = ABGRToUVJRow_Any_MSA;
if (IS_ALIGNED(width, 16)) {
ABGRToYJRow = ABGRToYJRow_MSA;
ABGRToUVJRow = ABGRToUVJRow_MSA;
}
}
#endif
#if defined(HAS_ABGRTOYJROW_LSX) && defined(HAS_ABGRTOUVJROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ABGRToYJRow = ABGRToYJRow_Any_LSX;
ABGRToUVJRow = ABGRToUVJRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
ABGRToYJRow = ABGRToYJRow_LSX;
ABGRToUVJRow = ABGRToUVJRow_LSX;
}
}
#endif
for (y = 0; y < height - 1; y += 2) {
ABGRToUVJRow(src_abgr, src_stride_abgr, dst_uj, dst_vj, width);
ABGRToYJRow(src_abgr, dst_yj, width);
ABGRToYJRow(src_abgr + src_stride_abgr, dst_yj + dst_stride_yj, width);
src_abgr += src_stride_abgr * 2;
dst_yj += dst_stride_yj * 2;
dst_uj += dst_stride_uj;
dst_vj += dst_stride_vj;
}
if (height & 1) {
ABGRToUVJRow(src_abgr, 0, dst_uj, dst_vj, width);
ABGRToYJRow(src_abgr, dst_yj, width);
}
return 0;
}
// Convert ABGR to J422. (JPeg full range I422).
LIBYUV_API
int ABGRToJ422(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_yj,
int dst_stride_yj,
uint8_t* dst_uj,
int dst_stride_uj,
uint8_t* dst_vj,
int dst_stride_vj,
int width,
int height) {
int y;
void (*ABGRToUVJRow)(const uint8_t* src_abgr0, int src_stride_abgr,
uint8_t* dst_uj, uint8_t* dst_vj, int width) =
ABGRToUVJRow_C;
void (*ABGRToYJRow)(const uint8_t* src_abgr, uint8_t* dst_yj, int width) =
ABGRToYJRow_C;
if (!src_abgr || !dst_yj || !dst_uj || !dst_vj || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_abgr = src_abgr + (height - 1) * src_stride_abgr;
src_stride_abgr = -src_stride_abgr;
}
// Coalesce rows.
if (src_stride_abgr == width * 4 && dst_stride_yj == width &&
dst_stride_uj * 2 == width && dst_stride_vj * 2 == width) {
width *= height;
height = 1;
src_stride_abgr = dst_stride_yj = dst_stride_uj = dst_stride_vj = 0;
}
#if defined(HAS_ABGRTOYJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ABGRToYJRow = ABGRToYJRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ABGRToYJRow = ABGRToYJRow_SSSE3;
}
}
#endif
#if defined(HAS_ABGRTOUVJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ABGRToUVJRow = ABGRToUVJRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ABGRToUVJRow = ABGRToUVJRow_SSSE3;
}
}
#endif
#if defined(HAS_ABGRTOYJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ABGRToYJRow = ABGRToYJRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ABGRToYJRow = ABGRToYJRow_AVX2;
}
}
#endif
#if defined(HAS_ABGRTOUVJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ABGRToUVJRow = ABGRToUVJRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ABGRToUVJRow = ABGRToUVJRow_AVX2;
}
}
#endif
#if defined(HAS_ABGRTOYJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ABGRToYJRow = ABGRToYJRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ABGRToYJRow = ABGRToYJRow_NEON;
}
}
#endif
#if defined(HAS_ABGRTOUVJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ABGRToUVJRow = ABGRToUVJRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ABGRToUVJRow = ABGRToUVJRow_NEON;
}
}
#endif
#if defined(HAS_ABGRTOYJROW_MSA) && defined(HAS_ABGRTOUVJROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ABGRToYJRow = ABGRToYJRow_Any_MSA;
ABGRToUVJRow = ABGRToUVJRow_Any_MSA;
if (IS_ALIGNED(width, 16)) {
ABGRToYJRow = ABGRToYJRow_MSA;
}
if (IS_ALIGNED(width, 32)) {
ABGRToUVJRow = ABGRToUVJRow_MSA;
}
}
#endif
#if defined(HAS_ABGRTOYJROW_LSX) && defined(HAS_ABGRTOUVJROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
ABGRToYJRow = ABGRToYJRow_Any_LSX;
ABGRToUVJRow = ABGRToUVJRow_Any_LSX;
if (IS_ALIGNED(width, 16)) {
ABGRToYJRow = ABGRToYJRow_LSX;
ABGRToUVJRow = ABGRToUVJRow_LSX;
}
}
#endif
#if defined(HAS_ABGRTOYJROW_LASX) && defined(HAS_ABGRTOUVJROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
ABGRToYJRow = ABGRToYJRow_Any_LASX;
ABGRToUVJRow = ABGRToUVJRow_Any_LASX;
if (IS_ALIGNED(width, 32)) {
ABGRToYJRow = ABGRToYJRow_LASX;
ABGRToUVJRow = ABGRToUVJRow_LASX;
}
}
#endif
for (y = 0; y < height; ++y) {
ABGRToUVJRow(src_abgr, 0, dst_uj, dst_vj, width);
ABGRToYJRow(src_abgr, dst_yj, width);
src_abgr += src_stride_abgr;
dst_yj += dst_stride_yj;
dst_uj += dst_stride_uj;
dst_vj += dst_stride_vj;
}
return 0;
}
// Convert ABGR to J400.
LIBYUV_API
int ABGRToJ400(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_yj,
int dst_stride_yj,
int width,
int height) {
int y;
void (*ABGRToYJRow)(const uint8_t* src_abgr, uint8_t* dst_yj, int width) =
ABGRToYJRow_C;
if (!src_abgr || !dst_yj || width <= 0 || height == 0) {
return -1;
}
if (height < 0) {
height = -height;
src_abgr = src_abgr + (height - 1) * src_stride_abgr;
src_stride_abgr = -src_stride_abgr;
}
// Coalesce rows.
if (src_stride_abgr == width * 4 && dst_stride_yj == width) {
width *= height;
height = 1;
src_stride_abgr = dst_stride_yj = 0;
}
#if defined(HAS_ABGRTOYJROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ABGRToYJRow = ABGRToYJRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ABGRToYJRow = ABGRToYJRow_SSSE3;
}
}
#endif
#if defined(HAS_ABGRTOYJROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ABGRToYJRow = ABGRToYJRow_Any_AVX2;
if (IS_ALIGNED(width, 32)) {
ABGRToYJRow = ABGRToYJRow_AVX2;
}
}
#endif
#if defined(HAS_ABGRTOYJROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ABGRToYJRow = ABGRToYJRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ABGRToYJRow = ABGRToYJRow_NEON;
}
}
#endif
#if defined(HAS_ABGRTOYJROW_MSA)
if (TestCpuFlag(kCpuHasMSA)) {
ABGRToYJRow = ABGRToYJRow_Any_MSA;
if (IS_ALIGNED(width, 16)) {
ABGRToYJRow = ABGRToYJRow_MSA;
}
}
#endif
for (y = 0; y < height; ++y) {
ABGRToYJRow(src_abgr, dst_yj, width);
src_abgr += src_stride_abgr;
dst_yj += dst_stride_yj;
}
return 0;
}
// Convert ARGB to AR64.
LIBYUV_API
int ARGBToAR64(const uint8_t* src_argb,
int src_stride_argb,
uint16_t* dst_ar64,
int dst_stride_ar64,
int width,
int height) {
int y;
void (*ARGBToAR64Row)(const uint8_t* src_argb, uint16_t* dst_ar64,
int width) = ARGBToAR64Row_C;
if (!src_argb || !dst_ar64 || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
// Coalesce rows.
if (src_stride_argb == width * 4 && dst_stride_ar64 == width * 4) {
width *= height;
height = 1;
src_stride_argb = dst_stride_ar64 = 0;
}
#if defined(HAS_ARGBTOAR64ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToAR64Row = ARGBToAR64Row_Any_SSSE3;
if (IS_ALIGNED(width, 4)) {
ARGBToAR64Row = ARGBToAR64Row_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOAR64ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToAR64Row = ARGBToAR64Row_Any_AVX2;
if (IS_ALIGNED(width, 8)) {
ARGBToAR64Row = ARGBToAR64Row_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOAR64ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToAR64Row = ARGBToAR64Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToAR64Row = ARGBToAR64Row_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
ARGBToAR64Row(src_argb, dst_ar64, width);
src_argb += src_stride_argb;
dst_ar64 += dst_stride_ar64;
}
return 0;
}
// Convert ARGB to AB64.
LIBYUV_API
int ARGBToAB64(const uint8_t* src_argb,
int src_stride_argb,
uint16_t* dst_ab64,
int dst_stride_ab64,
int width,
int height) {
int y;
void (*ARGBToAB64Row)(const uint8_t* src_argb, uint16_t* dst_ar64,
int width) = ARGBToAB64Row_C;
if (!src_argb || !dst_ab64 || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
// Coalesce rows.
if (src_stride_argb == width * 4 && dst_stride_ab64 == width * 4) {
width *= height;
height = 1;
src_stride_argb = dst_stride_ab64 = 0;
}
#if defined(HAS_ARGBTOAB64ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBToAB64Row = ARGBToAB64Row_Any_SSSE3;
if (IS_ALIGNED(width, 4)) {
ARGBToAB64Row = ARGBToAB64Row_SSSE3;
}
}
#endif
#if defined(HAS_ARGBTOAB64ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
ARGBToAB64Row = ARGBToAB64Row_Any_AVX2;
if (IS_ALIGNED(width, 8)) {
ARGBToAB64Row = ARGBToAB64Row_AVX2;
}
}
#endif
#if defined(HAS_ARGBTOAB64ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
ARGBToAB64Row = ARGBToAB64Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToAB64Row = ARGBToAB64Row_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
ARGBToAB64Row(src_argb, dst_ab64, width);
src_argb += src_stride_argb;
dst_ab64 += dst_stride_ab64;
}
return 0;
}
// Enabled if 1 pass is available
#if defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_MSA)
#define HAS_RAWTOYJROW
@ -2355,7 +2665,7 @@ int RAWToJNV21(const uint8_t* src_raw,
int halfwidth = (width + 1) >> 1;
#if defined(HAS_RAWTOYJROW)
void (*RAWToUVJRow)(const uint8_t* src_raw, int src_stride_raw,
uint8_t* dst_u, uint8_t* dst_v, int width) =
uint8_t* dst_uj, uint8_t* dst_vj, int width) =
RAWToUVJRow_C;
void (*RAWToYJRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) =
RAWToYJRow_C;
@ -2363,12 +2673,12 @@ int RAWToJNV21(const uint8_t* src_raw,
void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) =
RAWToARGBRow_C;
void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb,
uint8_t* dst_u, uint8_t* dst_v, int width) =
uint8_t* dst_uj, uint8_t* dst_vj, int width) =
ARGBToUVJRow_C;
void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) =
ARGBToYJRow_C;
#endif
void (*MergeUVRow_)(const uint8_t* src_u, const uint8_t* src_v,
void (*MergeUVRow_)(const uint8_t* src_uj, const uint8_t* src_vj,
uint8_t* dst_vu, int width) = MergeUVRow_C;
if (!src_raw || !dst_y || !dst_vu || width <= 0 || height == 0) {
return -1;
@ -2490,8 +2800,8 @@ int RAWToJNV21(const uint8_t* src_raw,
#endif
{
// Allocate a row of uv.
align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2);
uint8_t* row_v = row_u + ((halfwidth + 31) & ~31);
align_buffer_64(row_uj, ((halfwidth + 31) & ~31) * 2);
uint8_t* row_vj = row_uj + ((halfwidth + 31) & ~31);
#if !defined(HAS_RAWTOYJROW)
// Allocate 2 rows of ARGB.
const int kRowSize = (width * 4 + 31) & ~31;
@ -2500,15 +2810,15 @@ int RAWToJNV21(const uint8_t* src_raw,
for (y = 0; y < height - 1; y += 2) {
#if defined(HAS_RAWTOYJROW)
RAWToUVJRow(src_raw, src_stride_raw, row_u, row_v, width);
MergeUVRow_(row_v, row_u, dst_vu, halfwidth);
RAWToUVJRow(src_raw, src_stride_raw, row_uj, row_vj, width);
MergeUVRow_(row_vj, row_uj, dst_vu, halfwidth);
RAWToYJRow(src_raw, dst_y, width);
RAWToYJRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width);
#else
RAWToARGBRow(src_raw, row, width);
RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width);
ARGBToUVJRow(row, kRowSize, row_u, row_v, width);
MergeUVRow_(row_v, row_u, dst_vu, halfwidth);
ARGBToUVJRow(row, kRowSize, row_uj, row_vj, width);
MergeUVRow_(row_vj, row_uj, dst_vu, halfwidth);
ARGBToYJRow(row, dst_y, width);
ARGBToYJRow(row + kRowSize, dst_y + dst_stride_y, width);
#endif
@ -2518,20 +2828,20 @@ int RAWToJNV21(const uint8_t* src_raw,
}
if (height & 1) {
#if defined(HAS_RAWTOYJROW)
RAWToUVJRow(src_raw, 0, row_u, row_v, width);
MergeUVRow_(row_v, row_u, dst_vu, halfwidth);
RAWToUVJRow(src_raw, 0, row_uj, row_vj, width);
MergeUVRow_(row_vj, row_uj, dst_vu, halfwidth);
RAWToYJRow(src_raw, dst_y, width);
#else
RAWToARGBRow(src_raw, row, width);
ARGBToUVJRow(row, 0, row_u, row_v, width);
MergeUVRow_(row_v, row_u, dst_vu, halfwidth);
ARGBToUVJRow(row, 0, row_uj, row_vj, width);
MergeUVRow_(row_vj, row_uj, dst_vu, halfwidth);
ARGBToYJRow(row, dst_y, width);
#endif
}
#if !defined(HAS_RAWTOYJROW)
free_aligned_buffer_64(row);
#endif
free_aligned_buffer_64(row_u);
free_aligned_buffer_64(row_uj);
}
return 0;
}

View File

@ -915,7 +915,7 @@ int NV21ToNV12(const uint8_t* src_y,
// tile width is 16 and assumed.
// tile_height is 16 or 32 for MM21.
// src_stride_y is bytes per row of source ignoring tiling. e.g. 640
// TODO: More detile row functions.
// TODO(fbarchard): More detile row functions.
LIBYUV_API
void DetilePlane(const uint8_t* src_y,
@ -1033,6 +1033,66 @@ void DetileSplitUVPlane(const uint8_t* src_uv,
}
}
LIBYUV_API
void DetileToYUY2(const uint8_t* src_y,
int src_stride_y,
const uint8_t* src_uv,
int src_stride_uv,
uint8_t* dst_yuy2,
int dst_stride_yuy2,
int width,
int height,
int tile_height) {
const ptrdiff_t src_y_tile_stride = 16 * tile_height;
const ptrdiff_t src_uv_tile_stride = src_y_tile_stride / 2;
int y;
void (*DetileToYUY2)(const uint8_t* src_y, ptrdiff_t src_y_tile_stride,
const uint8_t* src_uv, ptrdiff_t src_uv_tile_stride,
uint8_t* dst_yuy2, int width) = DetileToYUY2_C;
assert(src_stride_y >= 0);
assert(src_stride_y > 0);
assert(src_stride_uv >= 0);
assert(src_stride_uv > 0);
assert(tile_height > 0);
if (width <= 0 || height == 0 || tile_height <= 0) {
return;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2;
dst_stride_yuy2 = -dst_stride_yuy2;
}
#if defined(HAS_DETILETOYUY2_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
DetileToYUY2 = DetileToYUY2_Any_NEON;
if (IS_ALIGNED(width, 16)) {
DetileToYUY2 = DetileToYUY2_NEON;
}
}
#endif
// Detile plane
for (y = 0; y < height; ++y) {
DetileToYUY2(src_y, src_y_tile_stride, src_uv, src_uv_tile_stride,
dst_yuy2, width);
dst_yuy2 += dst_stride_yuy2;
src_y += 16;
if (y & 0x1) {
src_uv += 16;
}
// Advance to next row of tiles.
if ((y & (tile_height - 1)) == (tile_height - 1)) {
src_y = src_y - src_y_tile_stride + src_stride_y * tile_height;
src_uv = src_uv - src_uv_tile_stride + src_stride_uv * (tile_height / 2);
}
}
}
// Support function for NV12 etc RGB channels.
// Width and height are plane sizes (typically half pixel width).
LIBYUV_API

View File

@ -959,6 +959,9 @@ ANY11(ABGRToYRow_Any_AVX2, ABGRToYRow_AVX2, 0, 4, 1, 31)
#ifdef HAS_ARGBTOYJROW_AVX2
ANY11(ARGBToYJRow_Any_AVX2, ARGBToYJRow_AVX2, 0, 4, 1, 31)
#endif
#ifdef HAS_ABGRTOYJROW_AVX2
ANY11(ABGRToYJRow_Any_AVX2, ABGRToYJRow_AVX2, 0, 4, 1, 31)
#endif
#ifdef HAS_RGBATOYJROW_AVX2
ANY11(RGBAToYJRow_Any_AVX2, RGBAToYJRow_AVX2, 0, 4, 1, 31)
#endif
@ -983,6 +986,9 @@ ANY11(UYVYToYRow_Any_SSE2, UYVYToYRow_SSE2, 1, 4, 1, 15)
#ifdef HAS_ARGBTOYJROW_SSSE3
ANY11(ARGBToYJRow_Any_SSSE3, ARGBToYJRow_SSSE3, 0, 4, 1, 15)
#endif
#ifdef HAS_ABGRTOYJROW_SSSE3
ANY11(ABGRToYJRow_Any_SSSE3, ABGRToYJRow_SSSE3, 0, 4, 1, 15)
#endif
#ifdef HAS_RGBATOYJROW_SSSE3
ANY11(RGBAToYJRow_Any_SSSE3, RGBAToYJRow_SSSE3, 0, 4, 1, 15)
#endif
@ -998,6 +1004,9 @@ ANY11(ARGBToYRow_Any_LASX, ARGBToYRow_LASX, 0, 4, 1, 31)
#ifdef HAS_ARGBTOYJROW_NEON
ANY11(ARGBToYJRow_Any_NEON, ARGBToYJRow_NEON, 0, 4, 1, 15)
#endif
#ifdef HAS_ABGRTOYJROW_NEON
ANY11(ABGRToYJRow_Any_NEON, ABGRToYJRow_NEON, 0, 4, 1, 15)
#endif
#ifdef HAS_RGBATOYJROW_NEON
ANY11(RGBAToYJRow_Any_NEON, RGBAToYJRow_NEON, 0, 4, 1, 15)
#endif
@ -2013,9 +2022,17 @@ ANY12S(ABGRToUVRow_Any_AVX2, ABGRToUVRow_AVX2, 0, 4, 31)
#ifdef HAS_ARGBTOUVJROW_AVX2
ANY12S(ARGBToUVJRow_Any_AVX2, ARGBToUVJRow_AVX2, 0, 4, 31)
#endif
#ifdef HAS_ABGRTOUVJROW_AVX2
ANY12S(ABGRToUVJRow_Any_AVX2, ABGRToUVJRow_AVX2, 0, 4, 31)
#endif
#ifdef HAS_ARGBTOUVJROW_SSSE3
ANY12S(ARGBToUVJRow_Any_SSSE3, ARGBToUVJRow_SSSE3, 0, 4, 15)
#endif
#ifdef HAS_ABGRTOUVJROW_SSSE3
ANY12S(ABGRToUVJRow_Any_SSSE3, ABGRToUVJRow_SSSE3, 0, 4, 15)
#endif
#ifdef HAS_ARGBTOUVROW_SSSE3
ANY12S(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_SSSE3, 0, 4, 15)
ANY12S(ARGBToUVJRow_Any_SSSE3, ARGBToUVJRow_SSSE3, 0, 4, 15)
ANY12S(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_SSSE3, 0, 4, 15)
ANY12S(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_SSSE3, 0, 4, 15)
ANY12S(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_SSSE3, 0, 4, 15)
@ -2040,6 +2057,9 @@ ANY12S(ARGBToUVRow_Any_LASX, ARGBToUVRow_LASX, 0, 4, 31)
#ifdef HAS_ARGBTOUVJROW_NEON
ANY12S(ARGBToUVJRow_Any_NEON, ARGBToUVJRow_NEON, 0, 4, 15)
#endif
#ifdef HAS_ABGRTOUVJROW_NEON
ANY12S(ABGRToUVJRow_Any_NEON, ABGRToUVJRow_NEON, 0, 4, 15)
#endif
#ifdef HAS_ARGBTOUVJROW_MSA
ANY12S(ARGBToUVJRow_Any_MSA, ARGBToUVJRow_MSA, 0, 4, 31)
#endif
@ -2229,6 +2249,29 @@ ANYDETILESPLITUV(DetileSplitUVRow_Any_NEON, DetileSplitUVRow_NEON, 15)
ANYDETILESPLITUV(DetileSplitUVRow_Any_SSSE3, DetileSplitUVRow_SSSE3, 15)
#endif
#define ANYDETILEMERGE(NAMEANY, ANY_SIMD, MASK) \
void NAMEANY(const uint8_t* src_y, ptrdiff_t src_y_tile_stride, \
const uint8_t* src_uv, ptrdiff_t src_uv_tile_stride, \
uint8_t* dst_yuy2, int width) { \
SIMD_ALIGNED(uint8_t temp[16 * 4]); \
memset(temp, 0, 16 * 4); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_y, src_y_tile_stride, src_uv, src_uv_tile_stride, dst_yuy2, \
n); \
} \
memcpy(temp, src_y + (n / 16) * src_y_tile_stride, r); \
memcpy(temp + 16, src_uv + (n / 16) * src_uv_tile_stride, r); \
ANY_SIMD(temp, src_y_tile_stride, temp + 16, src_uv_tile_stride, \
temp + 32, r); \
memcpy(dst_yuy2 + 2 * n, temp + 32, 2 * r); \
}
#ifdef HAS_DETILETOYUY2_NEON
ANYDETILEMERGE(DetileToYUY2_Any_NEON, DetileToYUY2_NEON, 15)
#endif
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv

View File

@ -798,6 +798,7 @@ static __inline int RGB2xToVJ(uint16_t r, uint16_t g, uint16_t b) {
#endif
MAKEROWYJ(ARGB, 2, 1, 0, 4)
MAKEROWYJ(ABGR, 0, 1, 2, 4)
MAKEROWYJ(RGBA, 3, 2, 1, 4)
MAKEROWYJ(RGB24, 2, 1, 0, 3)
MAKEROWYJ(RAW, 0, 1, 2, 3)
@ -2747,6 +2748,27 @@ void DetileSplitUVRow_C(const uint8_t* src_uv,
}
}
void DetileToYUY2_C(const uint8_t* src_y,
ptrdiff_t src_y_tile_stride,
const uint8_t* src_uv,
ptrdiff_t src_uv_tile_stride,
uint8_t* dst_yuy2,
int width) {
for (int x = 0; x < width - 15; x += 16) {
for (int i = 0; i < 8; i++) {
dst_yuy2[0] = src_y[0];
dst_yuy2[1] = src_uv[0];
dst_yuy2[2] = src_y[1];
dst_yuy2[3] = src_uv[1];
dst_yuy2 += 4;
src_y += 2;
src_uv += 2;
}
src_y += src_y_tile_stride - 16;
src_uv += src_uv_tile_stride - 16;
}
}
void SplitRGBRow_C(const uint8_t* src_rgb,
uint8_t* dst_r,
uint8_t* dst_g,

View File

@ -27,6 +27,9 @@ static const uvec8 kARGBToY = {25u, 129u, 66u, 0u, 25u, 129u, 66u, 0u,
static const uvec8 kARGBToYJ = {29u, 150u, 77u, 0u, 29u, 150u, 77u, 0u,
29u, 150u, 77u, 0u, 29u, 150u, 77u, 0u};
static const uvec8 kABGRToYJ = {77u, 150u, 29u, 0u, 77u, 150u, 29u, 0u,
77u, 150u, 29u, 0u, 77u, 150u, 29u, 0u};
static const uvec8 kRGBAToYJ = {0u, 29u, 150u, 77u, 0u, 29u, 150u, 77u,
0u, 29u, 150u, 77u, 0u, 29u, 150u, 77u};
#endif // defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_ARGBGRAYROW_SSSE3)
@ -39,12 +42,18 @@ static const vec8 kARGBToU = {112, -74, -38, 0, 112, -74, -38, 0,
static const vec8 kARGBToUJ = {127, -84, -43, 0, 127, -84, -43, 0,
127, -84, -43, 0, 127, -84, -43, 0};
static const vec8 kABGRToUJ = {-43, -84, 127, 0, -43, -84, 127, 0,
-43, -84, 127, 0, -43, -84, 127, 0};
static const vec8 kARGBToV = {-18, -94, 112, 0, -18, -94, 112, 0,
-18, -94, 112, 0, -18, -94, 112, 0};
static const vec8 kARGBToVJ = {-20, -107, 127, 0, -20, -107, 127, 0,
-20, -107, 127, 0, -20, -107, 127, 0};
static const vec8 kABGRToVJ = {127, -107, -20, 0, 127, -107, -20, 0,
127, -107, -20, 0, 127, -107, -20, 0};
// Constants for BGRA
static const uvec8 kBGRAToY = {0u, 66u, 129u, 25u, 0u, 66u, 129u, 25u,
0u, 66u, 129u, 25u, 0u, 66u, 129u, 25u};
@ -1398,6 +1407,24 @@ void ARGBToYJRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width) {
}
#endif // HAS_ARGBTOYJROW_SSSE3
#ifdef HAS_ABGRTOYJROW_SSSE3
// Convert 16 ABGR pixels (64 bytes) to 16 YJ values.
// Same as ABGRToYRow but different coefficients, no add 16.
void ABGRToYJRow_SSSE3(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
asm volatile(
"movdqa %3,%%xmm4 \n"
"movdqa %4,%%xmm5 \n"
LABELALIGN RGBTOY(xmm5)
: "+r"(src_abgr), // %0
"+r"(dst_y), // %1
"+r"(width) // %2
: "m"(kABGRToYJ), // %3
"m"(kSub128) // %4
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
}
#endif // HAS_ABGRTOYJROW_SSSE3
#ifdef HAS_RGBATOYJROW_SSSE3
// Convert 16 ARGB pixels (64 bytes) to 16 YJ values.
// Same as ARGBToYRow but different coefficients, no add 16.
@ -1416,7 +1443,8 @@ void RGBAToYJRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
}
#endif // HAS_RGBATOYJROW_SSSE3
#if defined(HAS_ARGBTOYROW_AVX2) || defined(HAS_ARGBEXTRACTALPHAROW_AVX2)
#if defined(HAS_ARGBTOYROW_AVX2) || defined(HAS_ABGRTOYROW_AVX2) || \
defined(HAS_ARGBEXTRACTALPHAROW_AVX2)
// vpermd for vphaddw + vpackuswb vpermd.
static const lvec32 kPermdARGBToY_AVX = {0, 4, 1, 5, 2, 6, 3, 7};
#endif
@ -1486,6 +1514,26 @@ void ARGBToYJRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) {
}
#endif // HAS_ARGBTOYJROW_AVX2
#ifdef HAS_ABGRTOYJROW_AVX2
// Convert 32 ABGR pixels (128 bytes) to 32 Y values.
void ABGRToYJRow_AVX2(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
asm volatile(
"vbroadcastf128 %3,%%ymm4 \n"
"vbroadcastf128 %4,%%ymm5 \n"
"vmovdqu %5,%%ymm6 \n"
LABELALIGN RGBTOY_AVX2(ymm5)
: "+r"(src_abgr), // %0
"+r"(dst_y), // %1
"+r"(width) // %2
: "m"(kABGRToYJ), // %3
"m"(kSub128), // %4
"m"(kPermdARGBToY_AVX) // %5
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
"xmm7");
}
#endif // HAS_ABGRTOYJROW_AVX2
#ifdef HAS_RGBATOYJROW_AVX2
// Convert 32 ARGB pixels (128 bytes) to 32 Y values.
void RGBAToYJRow_AVX2(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
@ -1571,11 +1619,15 @@ void ARGBToUVRow_SSSE3(const uint8_t* src_argb,
}
#endif // HAS_ARGBTOUVROW_SSSE3
#ifdef HAS_ARGBTOUVROW_AVX2
#if defined(HAS_ARGBTOUVROW_AVX2) || defined(HAS_ABGRTOUVROW_AVX2) || \
defined(HAS_ARGBTOUVJROW_AVX2) || defined(HAS_ABGRTOUVJROW_AVX2)
// vpshufb for vphaddw + vpackuswb packed to shorts.
static const lvec8 kShufARGBToUV_AVX = {
0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15,
0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15};
#endif
#if defined(HAS_ARGBTOUVROW_AVX2)
void ARGBToUVRow_AVX2(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_u,
@ -1765,6 +1817,71 @@ void ARGBToUVJRow_AVX2(const uint8_t* src_argb,
}
#endif // HAS_ARGBTOUVJROW_AVX2
// TODO(fbarchard): Pass kABGRToVJ / kABGRToUJ as matrix
#ifdef HAS_ABGRTOUVJROW_AVX2
void ABGRToUVJRow_AVX2(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
asm volatile(
"vbroadcastf128 %5,%%ymm5 \n"
"vbroadcastf128 %6,%%ymm6 \n"
"vbroadcastf128 %7,%%ymm7 \n"
"sub %1,%2 \n"
LABELALIGN
"1: \n"
"vmovdqu (%0),%%ymm0 \n"
"vmovdqu 0x20(%0),%%ymm1 \n"
"vmovdqu 0x40(%0),%%ymm2 \n"
"vmovdqu 0x60(%0),%%ymm3 \n"
"vpavgb 0x00(%0,%4,1),%%ymm0,%%ymm0 \n"
"vpavgb 0x20(%0,%4,1),%%ymm1,%%ymm1 \n"
"vpavgb 0x40(%0,%4,1),%%ymm2,%%ymm2 \n"
"vpavgb 0x60(%0,%4,1),%%ymm3,%%ymm3 \n"
"lea 0x80(%0),%0 \n"
"vshufps $0x88,%%ymm1,%%ymm0,%%ymm4 \n"
"vshufps $0xdd,%%ymm1,%%ymm0,%%ymm0 \n"
"vpavgb %%ymm4,%%ymm0,%%ymm0 \n"
"vshufps $0x88,%%ymm3,%%ymm2,%%ymm4 \n"
"vshufps $0xdd,%%ymm3,%%ymm2,%%ymm2 \n"
"vpavgb %%ymm4,%%ymm2,%%ymm2 \n"
"vpmaddubsw %%ymm7,%%ymm0,%%ymm1 \n"
"vpmaddubsw %%ymm7,%%ymm2,%%ymm3 \n"
"vpmaddubsw %%ymm6,%%ymm0,%%ymm0 \n"
"vpmaddubsw %%ymm6,%%ymm2,%%ymm2 \n"
"vphaddw %%ymm3,%%ymm1,%%ymm1 \n"
"vphaddw %%ymm2,%%ymm0,%%ymm0 \n"
"vpaddw %%ymm5,%%ymm0,%%ymm0 \n"
"vpaddw %%ymm5,%%ymm1,%%ymm1 \n"
"vpsraw $0x8,%%ymm1,%%ymm1 \n"
"vpsraw $0x8,%%ymm0,%%ymm0 \n"
"vpacksswb %%ymm0,%%ymm1,%%ymm0 \n"
"vpermq $0xd8,%%ymm0,%%ymm0 \n"
"vpshufb %8,%%ymm0,%%ymm0 \n"
"vextractf128 $0x0,%%ymm0,(%1) \n"
"vextractf128 $0x1,%%ymm0,0x0(%1,%2,1) \n"
"lea 0x10(%1),%1 \n"
"sub $0x20,%3 \n"
"jg 1b \n"
"vzeroupper \n"
: "+r"(src_abgr), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
"+rm"(width) // %3
: "r"((intptr_t)(src_stride_abgr)), // %4
"m"(kSub128), // %5
"m"(kABGRToVJ), // %6
"m"(kABGRToUJ), // %7
"m"(kShufARGBToUV_AVX) // %8
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
"xmm7");
}
#endif // HAS_ABGRTOUVJROW_AVX2
#ifdef HAS_ARGBTOUVJROW_SSSE3
void ARGBToUVJRow_SSSE3(const uint8_t* src_argb,
int src_stride_argb,
@ -1831,6 +1948,72 @@ void ARGBToUVJRow_SSSE3(const uint8_t* src_argb,
}
#endif // HAS_ARGBTOUVJROW_SSSE3
#ifdef HAS_ABGRTOUVJROW_SSSE3
void ABGRToUVJRow_SSSE3(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
asm volatile(
"movdqa %5,%%xmm3 \n"
"movdqa %6,%%xmm4 \n"
"movdqa %7,%%xmm5 \n"
"sub %1,%2 \n"
LABELALIGN
"1: \n"
"movdqu (%0),%%xmm0 \n"
"movdqu 0x00(%0,%4,1),%%xmm7 \n"
"pavgb %%xmm7,%%xmm0 \n"
"movdqu 0x10(%0),%%xmm1 \n"
"movdqu 0x10(%0,%4,1),%%xmm7 \n"
"pavgb %%xmm7,%%xmm1 \n"
"movdqu 0x20(%0),%%xmm2 \n"
"movdqu 0x20(%0,%4,1),%%xmm7 \n"
"pavgb %%xmm7,%%xmm2 \n"
"movdqu 0x30(%0),%%xmm6 \n"
"movdqu 0x30(%0,%4,1),%%xmm7 \n"
"pavgb %%xmm7,%%xmm6 \n"
"lea 0x40(%0),%0 \n"
"movdqa %%xmm0,%%xmm7 \n"
"shufps $0x88,%%xmm1,%%xmm0 \n"
"shufps $0xdd,%%xmm1,%%xmm7 \n"
"pavgb %%xmm7,%%xmm0 \n"
"movdqa %%xmm2,%%xmm7 \n"
"shufps $0x88,%%xmm6,%%xmm2 \n"
"shufps $0xdd,%%xmm6,%%xmm7 \n"
"pavgb %%xmm7,%%xmm2 \n"
"movdqa %%xmm0,%%xmm1 \n"
"movdqa %%xmm2,%%xmm6 \n"
"pmaddubsw %%xmm4,%%xmm0 \n"
"pmaddubsw %%xmm4,%%xmm2 \n"
"pmaddubsw %%xmm3,%%xmm1 \n"
"pmaddubsw %%xmm3,%%xmm6 \n"
"phaddw %%xmm2,%%xmm0 \n"
"phaddw %%xmm6,%%xmm1 \n"
"paddw %%xmm5,%%xmm0 \n"
"paddw %%xmm5,%%xmm1 \n"
"psraw $0x8,%%xmm0 \n"
"psraw $0x8,%%xmm1 \n"
"packsswb %%xmm1,%%xmm0 \n"
"movlps %%xmm0,(%1) \n"
"movhps %%xmm0,0x00(%1,%2,1) \n"
"lea 0x8(%1),%1 \n"
"sub $0x10,%3 \n"
"jg 1b \n"
: "+r"(src_abgr), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
"+rm"(width) // %3
: "r"((intptr_t)(src_stride_abgr)), // %4
"m"(kABGRToVJ), // %5
"m"(kABGRToUJ), // %6
"m"(kSub128) // %7
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6", "xmm7");
}
#endif // HAS_ABGRTOUVJROW_SSSE3
#ifdef HAS_ARGBTOUV444ROW_SSSE3
void ARGBToUV444Row_SSSE3(const uint8_t* src_argb,
uint8_t* dst_u,

View File

@ -622,6 +622,61 @@ void DetileSplitUVRow_NEON(const uint8_t* src_uv,
);
}
#if LIBYUV_USE_ST2
// Read 16 Y, 8 UV, and write 8 YUYV.
void DetileToYUY2_NEON(const uint8_t* src_y,
ptrdiff_t src_y_tile_stride,
const uint8_t* src_uv,
ptrdiff_t src_uv_tile_stride,
uint8_t* dst_yuy2,
int width) {
asm volatile(
"1: \n"
"vld1.8 q0, [%0], %4 \n" // Load 16 Y
"pld [%0, 1792] \n"
"vld1.8 q1, [%1], %5 \n" // Load 8 UV
"pld [%1, 1792] \n"
"subs %3, %3, #16 \n"
"vst2.8 {q0, q1}, [%2]! \n"
"bgt 1b \n"
: "+r"(src_y), // %0
"+r"(src_uv), // %1
"+r"(dst_yuy2), // %2
"+r"(width) // %3
: "r"(src_y_tile_stride), // %4
"r"(src_uv_tile_stride) // %5
: "cc", "memory", "d0", "d1", "d2", "d3" // Clobber list
);
}
#else
// Read 16 Y, 8 UV, and write 8 YUYV.
void DetileToYUY2_NEON(const uint8_t* src_y,
ptrdiff_t src_y_tile_stride,
const uint8_t* src_uv,
ptrdiff_t src_uv_tile_stride,
uint8_t* dst_yuy2,
int width) {
asm volatile(
"1: \n"
"vld1.8 q0, [%0], %4 \n" // Load 16 Y
"vld1.8 q1, [%1], %5 \n" // Load 8 UV
"subs %3, %3, #16 \n"
"pld [%0, 1792] \n"
"vzip.8 q0, q1 \n"
"pld [%1, 1792] \n"
"vst1.8 {q0, q1}, [%2]! \n"
"bgt 1b \n"
: "+r"(src_y), // %0
"+r"(src_uv), // %1
"+r"(dst_yuy2), // %2
"+r"(width) // %3
: "r"(src_y_tile_stride), // %4
"r"(src_uv_tile_stride) // %5
: "cc", "memory", "q0", "q1", "q2", "q3" // Clobber list
);
}
#endif
// Reads 16 U's and V's and writes out 16 pairs of UV.
void MergeUVRow_NEON(const uint8_t* src_u,
const uint8_t* src_v,
@ -1762,7 +1817,7 @@ void ARGBToUVRow_NEON(const uint8_t* src_argb,
);
}
// TODO(fbarchard): Subsample match C code.
// TODO(fbarchard): Subsample match Intel code.
void ARGBToUVJRow_NEON(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_u,
@ -1808,6 +1863,51 @@ void ARGBToUVJRow_NEON(const uint8_t* src_argb,
);
}
void ABGRToUVJRow_NEON(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_uj,
uint8_t* dst_vj,
int width) {
asm volatile (
"add %1, %0, %1 \n" // src_stride + src_argb
"vmov.s16 q10, #127 / 2 \n" // UB / VR 0.500 coefficient
"vmov.s16 q11, #84 / 2 \n" // UG -0.33126 coefficient
"vmov.s16 q12, #43 / 2 \n" // UR -0.16874 coefficient
"vmov.s16 q13, #20 / 2 \n" // VB -0.08131 coefficient
"vmov.s16 q14, #107 / 2 \n" // VG -0.41869 coefficient
"vmov.u16 q15, #0x8080 \n" // 128.5
"1: \n"
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ABGR pixels.
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ABGR pixels.
"vpaddl.u8 q0, q0 \n" // R 16 bytes -> 8 shorts.
"vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
"vpaddl.u8 q2, q2 \n" // B 16 bytes -> 8 shorts.
"vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ABGR pixels.
"vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ABGR pixels.
"vpadal.u8 q0, q4 \n" // R 16 bytes -> 8 shorts.
"vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
"vpadal.u8 q2, q6 \n" // B 16 bytes -> 8 shorts.
"vrshr.u16 q0, q0, #1 \n" // 2x average
"vrshr.u16 q1, q1, #1 \n"
"vrshr.u16 q2, q2, #1 \n"
"subs %4, %4, #16 \n" // 16 processed per loop.
RGBTOUV(q2, q1, q0)
"vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
"vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
"bgt 1b \n"
: "+r"(src_abgr), // %0
"+r"(src_stride_abgr), // %1
"+r"(dst_uj), // %2
"+r"(dst_vj), // %3
"+r"(width) // %4
:
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
// TODO(fbarchard): Subsample match C code.
void RGB24ToUVJRow_NEON(const uint8_t* src_rgb24,
int src_stride_rgb24,
@ -2567,6 +2667,10 @@ void ABGRToYRow_NEON(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
ARGBToYMatrixRow_NEON(src_abgr, dst_y, width, &kRawI601Constants);
}
void ABGRToYJRow_NEON(const uint8_t* src_abgr, uint8_t* dst_yj, int width) {
ARGBToYMatrixRow_NEON(src_abgr, dst_yj, width, &kRawJPEGConstants);
}
// RGBA expects first value to be A and ignored, then 3 values to contain RGB.
// Same code as ARGB, except the LD4
void RGBAToYMatrixRow_NEON(const uint8_t* src_rgba,

View File

@ -650,6 +650,62 @@ void DetileSplitUVRow_NEON(const uint8_t* src_uv,
);
}
#if LIBYUV_USE_ST2
// Read 16 Y, 8 UV, and write 8 YUY2
void DetileToYUY2_NEON(const uint8_t* src_y,
ptrdiff_t src_y_tile_stride,
const uint8_t* src_uv,
ptrdiff_t src_uv_tile_stride,
uint8_t* dst_yuy2,
int width) {
asm volatile(
"1: \n"
"ld1 {v0.16b}, [%0], %4 \n" // load 16 Ys
"prfm pldl1keep, [%0, 1792] \n"
"ld1 {v1.16b}, [%1], %5 \n" // load 8 UVs
"prfm pldl1keep, [%1, 1792] \n"
"subs %w3, %w3, #16 \n" // store 8 YUY2
"st2 {v0.16b,v1.16b}, [%2], #32 \n"
"b.gt 1b \n"
: "+r"(src_y), // %0
"+r"(src_uv), // %1
"+r"(dst_yuy2), // %2
"+r"(width) // %3
: "r"(src_y_tile_stride), // %4
"r"(src_uv_tile_stride) // %5
: "cc", "memory", "v0", "v1" // Clobber list
);
}
#else
// Read 16 Y, 8 UV, and write 8 YUY2
void DetileToYUY2_NEON(const uint8_t* src_y,
ptrdiff_t src_y_tile_stride,
const uint8_t* src_uv,
ptrdiff_t src_uv_tile_stride,
uint8_t* dst_yuy2,
int width) {
asm volatile(
"1: \n"
"ld1 {v0.16b}, [%0], %4 \n" // load 16 Ys
"ld1 {v1.16b}, [%1], %5 \n" // load 8 UVs
"subs %w3, %w3, #16 \n"
"prfm pldl1keep, [%0, 1792] \n"
"zip1 v2.16b, v0.16b, v1.16b \n"
"prfm pldl1keep, [%1, 1792] \n"
"zip2 v3.16b, v0.16b, v1.16b \n"
"st1 {v2.16b,v3.16b}, [%2], #32 \n" // store 8 YUY2
"b.gt 1b \n"
: "+r"(src_y), // %0
"+r"(src_uv), // %1
"+r"(dst_yuy2), // %2
"+r"(width) // %3
: "r"(src_y_tile_stride), // %4
"r"(src_uv_tile_stride) // %5
: "cc", "memory", "v0", "v1", "v2", "v3" // Clobber list
);
}
#endif
#if LIBYUV_USE_ST2
// Reads 16 U's and V's and writes out 16 pairs of UV.
void MergeUVRow_NEON(const uint8_t* src_u,
@ -2144,6 +2200,7 @@ void ARGBToUVRow_NEON(const uint8_t* src_argb,
);
}
// TODO(fbarchard): Subsample match Intel code.
void ARGBToUVJRow_NEON(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_u,
@ -2189,6 +2246,51 @@ void ARGBToUVJRow_NEON(const uint8_t* src_argb,
);
}
void ABGRToUVJRow_NEON(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_uj,
uint8_t* dst_vj,
int width) {
const uint8_t* src_abgr_1 = src_abgr + src_stride_abgr;
asm volatile (
"movi v20.8h, #63, lsl #0 \n" // UB/VR coeff (0.500) / 2
"movi v21.8h, #42, lsl #0 \n" // UG coeff (-0.33126) / 2
"movi v22.8h, #21, lsl #0 \n" // UR coeff (-0.16874) / 2
"movi v23.8h, #10, lsl #0 \n" // VB coeff (-0.08131) / 2
"movi v24.8h, #53, lsl #0 \n" // VG coeff (-0.41869) / 2
"movi v25.16b, #0x80 \n" // 128.5 (0x8080 in 16-bit)
"1: \n"
"ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels.
"uaddlp v0.8h, v0.16b \n" // R 16 bytes -> 8 shorts.
"prfm pldl1keep, [%0, 448] \n"
"uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts.
"uaddlp v2.8h, v2.16b \n" // B 16 bytes -> 8 shorts.
"ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load next 16
"uadalp v0.8h, v4.16b \n" // R 16 bytes -> 8 shorts.
"prfm pldl1keep, [%1, 448] \n"
"uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts.
"uadalp v2.8h, v6.16b \n" // B 16 bytes -> 8 shorts.
"urshr v0.8h, v0.8h, #1 \n" // 2x average
"urshr v1.8h, v1.8h, #1 \n"
"urshr v2.8h, v2.8h, #1 \n"
"subs %w4, %w4, #16 \n" // 16 processed per loop.
RGBTOUV(v2.8h, v1.8h, v0.8h)
"st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U.
"st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V.
"b.gt 1b \n"
: "+r"(src_abgr), // %0
"+r"(src_abgr_1), // %1
"+r"(dst_uj), // %2
"+r"(dst_vj), // %3
"+r"(width) // %4
:
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
"v20", "v21", "v22", "v23", "v24", "v25"
);
}
void RGB24ToUVJRow_NEON(const uint8_t* src_rgb24,
int src_stride_rgb24,
uint8_t* dst_u,
@ -2812,6 +2914,10 @@ void ABGRToYRow_NEON(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
ARGBToYMatrixRow_NEON(src_abgr, dst_y, width, &kRawI601Constants);
}
void ABGRToYJRow_NEON(const uint8_t* src_abgr, uint8_t* dst_yj, int width) {
ARGBToYMatrixRow_NEON(src_abgr, dst_yj, width, &kRawJPEGConstants);
}
// RGBA expects first value to be A and ignored, then 3 values to contain RGB.
// Same code as ARGB, except the LD4
void RGBAToYMatrixRow_NEON(const uint8_t* src_rgba,

View File

@ -2039,7 +2039,7 @@ void ScalePlane_16(const uint16_t* src,
}
// Arbitrary scale vertically, but unscaled horizontally.
ScalePlaneVertical_16(src_height, dst_width, dst_height, src_stride,
dst_stride, src, dst, 0, y, dy, /*bpp=*/1, filtering);
dst_stride, src, dst, 0, y, dy, /*wpp=*/1, filtering);
return;
}
if (dst_width <= Abs(src_width) && dst_height <= src_height) {

View File

@ -1236,6 +1236,8 @@ TESTATOPLANAR(ARGB, 4, 1, I422, 2, 1)
TESTATOPLANAR(ARGB, 4, 1, I444, 1, 1)
TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2)
TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1)
TESTATOPLANAR(ABGR, 4, 1, J420, 2, 2)
TESTATOPLANAR(ABGR, 4, 1, J422, 2, 1)
#ifdef LITTLE_ENDIAN_ONLY_TEST
TESTATOPLANAR(ARGB4444, 2, 1, I420, 2, 2)
TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2)
@ -1440,6 +1442,7 @@ TESTATOB(ARGB, uint8_t, 4, 4, 1, ARGBMirror, uint8_t, 4, 4, 1)
TESTATOB(ARGB, uint8_t, 4, 4, 1, BGRA, uint8_t, 4, 4, 1)
TESTATOB(ARGB, uint8_t, 4, 4, 1, I400, uint8_t, 1, 1, 1)
TESTATOB(ARGB, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1)
TESTATOB(ABGR, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1)
TESTATOB(RGBA, uint8_t, 4, 4, 1, J400, uint8_t, 1, 1, 1)
TESTATOB(ARGB, uint8_t, 4, 4, 1, RAW, uint8_t, 3, 3, 1)
TESTATOB(ARGB, uint8_t, 4, 4, 1, RGB24, uint8_t, 3, 3, 1)
@ -3042,6 +3045,51 @@ TESTPLANARTOBD(I420, 2, 2, RGB565, 2, 2, 1, ARGB, 4)
TESTPTOB(TestYUY2ToNV12, YUY2ToI420, YUY2ToNV12)
TESTPTOB(TestUYVYToNV12, UYVYToI420, UYVYToNV12)
TEST_F(LibYUVConvertTest, MM21ToYUY2) {
const int kWidth = (benchmark_width_ + 15) & (~15);
const int kHeight = (benchmark_height_ + 31) & (~31);
align_buffer_page_end(orig_y, kWidth * kHeight);
align_buffer_page_end(orig_uv,
2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2));
align_buffer_page_end(tmp_y, kWidth * kHeight);
align_buffer_page_end(tmp_u, SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2));
align_buffer_page_end(tmp_v, SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2));
align_buffer_page_end(dst_yuyv, 4 * SUBSAMPLE(kWidth, 2) * kHeight);
align_buffer_page_end(golden_yuyv, 4 * SUBSAMPLE(kWidth, 2) * kHeight);
MemRandomize(orig_y, kWidth * kHeight);
MemRandomize(orig_uv, 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2));
/* Convert MM21 to YUY2 in 2 steps for reference */
libyuv::MM21ToI420(orig_y, kWidth, orig_uv, 2 * SUBSAMPLE(kWidth, 2), tmp_y,
kWidth, tmp_u, SUBSAMPLE(kWidth, 2), tmp_v,
SUBSAMPLE(kWidth, 2), kWidth, kHeight);
libyuv::I420ToYUY2(tmp_y, kWidth, tmp_u, SUBSAMPLE(kWidth, 2), tmp_v,
SUBSAMPLE(kWidth, 2), golden_yuyv,
4 * SUBSAMPLE(kWidth, 2), kWidth, kHeight);
/* Convert to NV12 */
for (int i = 0; i < benchmark_iterations_; ++i) {
libyuv::MM21ToYUY2(orig_y, kWidth, orig_uv, 2 * SUBSAMPLE(kWidth, 2),
dst_yuyv, 4 * SUBSAMPLE(kWidth, 2), kWidth, kHeight);
}
for (int i = 0; i < 4 * SUBSAMPLE(kWidth, 2) * kHeight; ++i) {
EXPECT_EQ(dst_yuyv[i], golden_yuyv[i]);
}
free_aligned_buffer_page_end(orig_y);
free_aligned_buffer_page_end(orig_uv);
free_aligned_buffer_page_end(tmp_y);
free_aligned_buffer_page_end(tmp_u);
free_aligned_buffer_page_end(tmp_v);
free_aligned_buffer_page_end(dst_yuyv);
free_aligned_buffer_page_end(golden_yuyv);
}
// Transitive test. A to B to C is same as A to C.
// Benchmarks A To B to C for comparison to 1 step, benchmarked elsewhere.
#define TESTPLANARTOEI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, SUB_B, BPP_B, \