From ca577883ae1db4c675030a50909729e0dd69846c Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Tue, 9 Jun 2026 11:30:45 -0700 Subject: [PATCH] add bmm detect and vdpphps in util/cpuid Bug: None Change-Id: I9954f96a74e653e3ecd3fbeba533299fa8e57d95 --- Android.mk | 4 - BUILD.gn | 7 + README.chromium | 2 +- docs/environment_variables.md | 1 + docs/getting_started.md | 1 + include/libyuv/convert.h | 39 - include/libyuv/convert_from_argb.h | 13 - include/libyuv/cpu_id.h | 1 + include/libyuv/row.h | 660 +++---- include/libyuv/row_sve.h | 12 +- include/libyuv/version.h | 2 +- libyuv.gyp | 12 + libyuv.gypi | 2 - source/compare.cc | 12 +- source/compare_neon64.cc | 2 +- source/compare_win.cc | 20 +- source/convert.cc | 2890 ++++++++++++++-------------- source/convert_argb.cc | 507 ++--- source/convert_from.cc | 91 +- source/convert_from_argb.cc | 2570 ++++++++++++++++++------- source/convert_to_argb.cc | 221 +-- source/convert_to_i420.cc | 123 +- source/cpu_id.cc | 7 +- source/planar_functions.cc | 666 +++---- source/rotate.cc | 121 +- source/rotate_argb.cc | 7 +- source/rotate_common.cc | 8 +- source/rotate_neon.cc | 20 +- source/rotate_neon64.cc | 20 +- source/rotate_win.cc | 12 +- source/row_any.cc | 193 +- source/row_common.cc | 680 ++----- source/row_gcc.cc | 1949 +++++++++---------- source/row_lasx.cc | 36 +- source/row_lsx.cc | 36 +- source/row_neon.cc | 379 ++-- source/row_neon64.cc | 845 +++++--- source/row_rvv.cc | 32 +- source/row_sme.cc | 7 +- source/row_sve.cc | 7 +- source/row_win.cc | 664 +------ source/scale.cc | 303 +-- source/scale_argb.cc | 369 +++- source/scale_common.cc | 26 +- source/scale_gcc.cc | 62 +- source/scale_rgb.cc | 4 +- source/scale_uv.cc | 132 +- source/scale_win.cc | 20 +- unit_test/basictypes_test.cc | 32 +- unit_test/color_test.cc | 241 +-- unit_test/compare_test.cc | 102 +- unit_test/convert_argb_test.cc | 543 +++--- unit_test/convert_test.cc | 534 +++-- unit_test/cpu_test.cc | 30 +- unit_test/cpu_thread_test.cc | 6 +- unit_test/math_test.cc | 72 +- unit_test/planar_test.cc | 1028 +++++----- unit_test/rotate_argb_test.cc | 20 +- unit_test/rotate_test.cc | 40 +- unit_test/scale_argb_test.cc | 100 +- unit_test/scale_plane_test.cc | 237 +-- unit_test/scale_rgb_test.cc | 24 +- unit_test/scale_test.cc | 56 +- unit_test/scale_uv_test.cc | 24 +- unit_test/unit_test.cc | 3 + unit_test/unit_test.h | 5 +- unit_test/video_common_test.cc | 136 +- util/cpuid.c | 35 + util/ssim.cc | 34 +- 69 files changed, 8440 insertions(+), 8659 deletions(-) diff --git a/Android.mk b/Android.mk index a5fb72f63..c83bdb7ff 100644 --- a/Android.mk +++ b/Android.mk @@ -1,7 +1,4 @@ # This is the Android makefile for libyuv for NDK. - -# Ignore this file during non-NDK builds. -ifdef NDK_ROOT LOCAL_PATH:= $(call my-dir) include $(CLEAR_VARS) @@ -107,4 +104,3 @@ LOCAL_SRC_FILES := \ LOCAL_MODULE := libyuv_unittest include $(BUILD_NATIVE_TEST) -endif # NDK_ROOT diff --git a/BUILD.gn b/BUILD.gn index 2288e24a5..0c0749998 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -22,6 +22,13 @@ declare_args() { config("libyuv_config") { include_dirs = [ "include" ] + if (is_android) { + if (target_cpu == "arm" || target_cpu == "x86") { + ldflags = [ "-Wl,--dynamic-linker,/system/bin/linker" ] + } else { + ldflags = [ "-Wl,--dynamic-linker,/system/bin/linker64" ] + } + } # Define CHROMIUM to tell cpu_id to avoid sandbox unsafe system calls. defines = [ "CHROMIUM" ] diff --git a/README.chromium b/README.chromium index cc424502a..92d44bc8c 100644 --- a/README.chromium +++ b/README.chromium @@ -1,6 +1,6 @@ Name: libyuv URL: https://chromium.googlesource.com/libyuv/libyuv/ -Version: 1948 +Version: 1937 Revision: DEPS License: BSD-3-Clause License File: LICENSE diff --git a/docs/environment_variables.md b/docs/environment_variables.md index 3905d65cc..02c04e61b 100644 --- a/docs/environment_variables.md +++ b/docs/environment_variables.md @@ -33,6 +33,7 @@ By default the cpu is detected and the most advanced form of SIMD is used. But LIBYUV_DISABLE_AVXVNNI LIBYUV_DISABLE_AVXVNNIINT8 LIBYUV_DISABLE_AMXINT8 + LIBYUV_DISABLE_AVX512BMM ## Arm CPUs diff --git a/docs/getting_started.md b/docs/getting_started.md index 06160bb20..6f5593576 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -72,6 +72,7 @@ Additional commonly used compiler options can be passed to Bazel via `--copt`: bazel build -c opt --config=android_arm64 \ --copt=-DLIBYUV_UNLIMITED_DATA \ + --copt=-DLIBYUV_BIT_EXACT=1 \ --copt=-DENABLE_ROW_TESTS \ //:libyuv_test diff --git a/include/libyuv/convert.h b/include/libyuv/convert.h index 4c4f8f1f9..662337750 100644 --- a/include/libyuv/convert.h +++ b/include/libyuv/convert.h @@ -888,45 +888,6 @@ int ABGRToI420(const uint8_t* src_abgr, int width, int height); -// BGRA little endian (argb in memory) to I422. -LIBYUV_API -int BGRAToI422(const uint8_t* src_bgra, - int src_stride_bgra, - uint8_t* dst_y, - int dst_stride_y, - uint8_t* dst_u, - int dst_stride_u, - uint8_t* dst_v, - int dst_stride_v, - int width, - int height); - -// ABGR little endian (rgba in memory) to I422. -LIBYUV_API -int ABGRToI422(const uint8_t* src_abgr, - int src_stride_abgr, - uint8_t* dst_y, - int dst_stride_y, - uint8_t* dst_u, - int dst_stride_u, - uint8_t* dst_v, - int dst_stride_v, - int width, - int height); - -// RGBA little endian (abgr in memory) to I422. -LIBYUV_API -int RGBAToI422(const uint8_t* src_rgba, - int src_stride_rgba, - uint8_t* dst_y, - int dst_stride_y, - uint8_t* dst_u, - int dst_stride_u, - uint8_t* dst_v, - int dst_stride_v, - int width, - int height); - // RGBA little endian (abgr in memory) to I420. LIBYUV_API int RGBAToI420(const uint8_t* src_rgba, diff --git a/include/libyuv/convert_from_argb.h b/include/libyuv/convert_from_argb.h index d9fac50c9..e8a8d6a4d 100644 --- a/include/libyuv/convert_from_argb.h +++ b/include/libyuv/convert_from_argb.h @@ -245,19 +245,6 @@ int ARGBToI422(const uint8_t* src_argb, int width, int height); -// Convert ABGR To I422. -LIBYUV_API -int ABGRToI422(const uint8_t* src_abgr, - int src_stride_abgr, - uint8_t* dst_y, - int dst_stride_y, - uint8_t* dst_u, - int dst_stride_u, - uint8_t* dst_v, - int dst_stride_v, - int width, - int height); - // RGB to I444 with matrix. See ArgbConstants at the top of this file for usage. LIBYUV_API int ARGBToI422Matrix(const uint8_t* src_argb, diff --git a/include/libyuv/cpu_id.h b/include/libyuv/cpu_id.h index 61a934ce2..c6983fb32 100644 --- a/include/libyuv/cpu_id.h +++ b/include/libyuv/cpu_id.h @@ -60,6 +60,7 @@ static const int kCpuHasAVX10_2 = 0x2000000; static const int kCpuHasAVXVNNI = 0x4000000; static const int kCpuHasAVXVNNIINT8 = 0x8000000; static const int kCpuHasAMXINT8 = 0x10000000; +static const int kCpuHasAVX512BMM = 0x20000000; // These flags are only valid on LOONGARCH processors. static const int kCpuHasLOONGARCH = 0x20; diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 835342acd..3072d8ff9 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -23,11 +23,10 @@ extern "C" { #endif // This module is for Visual C 32/64 bit -#if !defined(LIBYUV_DISABLE_X86) && \ - (defined(__x86_64__) || defined(__i386__) || defined(_M_X64) || \ - defined(_M_X86)) -#if ((defined(_MSC_VER) && !defined(__clang__)) || \ - defined(LIBYUV_ENABLE_ROWWIN)) +#if !defined(LIBYUV_DISABLE_X86) && \ + (defined(__x86_64__) || defined(__i386__) || \ + defined(_M_X64) || defined(_M_X86)) +#if ((defined(_MSC_VER) && !defined(__clang__)) || defined(LIBYUV_ENABLE_ROWWIN)) #define USE_ROW_WIN #else #define USE_ROW_GCC @@ -37,33 +36,51 @@ extern "C" { // The following are available on clang x86 platforms: #if defined(USE_ROW_GCC) // Conversions: +#define HAS_ARGB1555TOARGBROW_SSE2 +#define HAS_ARGB4444TOARGBROW_SSE2 #define HAS_ARGBEXTRACTALPHAROW_SSE2 #define HAS_ARGBSETROW_X86 #define HAS_ARGBSHUFFLEROW_SSSE3 +#define HAS_ARGBTOARGB1555ROW_SSE2 +#define HAS_ARGBTOARGB4444ROW_SSE2 #define HAS_ARGBTORAWROW_SSSE3 #define HAS_ARGBTORGB24ROW_SSSE3 +#define HAS_ARGBTORGB565DITHERROW_SSE2 +#define HAS_ARGBTORGB565ROW_SSE2 #define HAS_COPYROW_ERMS #define HAS_COPYROW_SSE2 +#define HAS_H422TOARGBROW_SSSE3 +#define HAS_I422TOARGB1555ROW_SSSE3 +#define HAS_I422TOARGB4444ROW_SSSE3 #define HAS_I422TOARGBROW_SSSE3 #define HAS_I422TORGB24ROW_SSSE3 -#define HAS_I422TORGBBAROW_SSSE3 +#define HAS_I422TORGB565ROW_SSSE3 +#define HAS_I422TORGBAROW_SSSE3 #define HAS_I422TOUYVYROW_SSE2 #define HAS_I422TOYUY2ROW_SSE2 #define HAS_I444TOARGBROW_SSSE3 #define HAS_I444TORGB24ROW_SSSE3 +#define HAS_INTERPOLATEROW_SSSE3 +#define HAS_J400TOARGBROW_SSE2 +#define HAS_J422TOARGBROW_SSSE3 #define HAS_MERGEUVROW_SSE2 #define HAS_MIRRORROW_SSSE3 +#define HAS_MIRRORSPLITUVROW_SSSE3 #define HAS_NV12TOARGBROW_SSSE3 #define HAS_NV12TORGB24ROW_SSSE3 +#define HAS_NV12TORGB565ROW_SSSE3 #define HAS_NV21TOARGBROW_SSSE3 #define HAS_NV21TORGB24ROW_SSSE3 #define HAS_RAWTOARGBROW_SSSE3 #define HAS_RAWTORGB24ROW_SSSE3 #define HAS_RGB24TOARGBROW_SSSE3 +#define HAS_RGB565TOARGBROW_SSE2 #define HAS_SETROW_ERMS #define HAS_SETROW_X86 #define HAS_SPLITUVROW_SSE2 #define HAS_UYVYTOARGBROW_SSSE3 +#define HAS_UYVYTOUV422ROW_SSE2 +#define HAS_UYVYTOUVROW_SSE2 #define HAS_UYVYTOYROW_SSE2 #define HAS_YUY2TOARGBROW_SSSE3 #define HAS_YUY2TOUV422ROW_SSE2 @@ -122,25 +139,14 @@ extern "C" { // The following are available on all x86 platforms, but // require VS2012, clang 3.4 or gcc 4.7. -#if !defined(LIBYUV_DISABLE_X86) && \ - (defined(__x86_64__) || defined(__i386__) || defined(_M_X64) || \ - defined(_M_X86)) -#define HAS_ARGBMIRRORROW_AVX2 -#define HAS_RGB24MIRRORROW_AVX2 +#if !defined(LIBYUV_DISABLE_X86) && \ + (defined(__x86_64__) || defined(__i386__) || \ + defined(_M_X64) || defined(_M_X86)) #define HAS_ARGBTOUVMATRIXROW_AVX2 -#define HAS_RGBTOUVMATRIXROW_AVX2 -#define HAS_RGB565TOUVMATRIXROW_AVX2 -#define HAS_ARGB1555TOUVMATRIXROW_AVX2 -#define HAS_ARGB4444TOUVMATRIXROW_AVX2 #define HAS_MERGEUVROW_AVX2 -#define HAS_MIRRORROW_AVX2 -#define HAS_MIRRORSPLITUVROW_AVX2 -#define HAS_MIRRORUVROW_AVX2 -#define HAS_INTERPOLATEROW_16_AVX2 -#define HAS_INTERPOLATEROW_AVX2 #endif -#if !defined(LIBYUV_DISABLE_X86) && defined(USE_ROW_GCC) && \ +#if !defined(LIBYUV_DISABLE_X86) && defined(USE_ROW_GCC) && \ (defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2) || \ defined(GCC_HAS_AVX2)) #define HAS_ARGBCOPYALPHAROW_AVX2 @@ -151,21 +157,28 @@ extern "C" { #define HAS_ARGBSHUFFLEROW_AVX2 #define HAS_ARGBTORGB565DITHERROW_AVX2 #define HAS_COPYROW_AVX +#define HAS_H422TOARGBROW_AVX2 #define HAS_HALFFLOATROW_AVX2 +#define HAS_I422TOARGB1555ROW_AVX2 +#define HAS_I422TOARGB4444ROW_AVX2 #define HAS_I422TOARGBROW_AVX2 #define HAS_I422TORGB24ROW_AVX2 +#define HAS_I422TORGB565ROW_AVX2 #define HAS_I422TORGBAROW_AVX2 #define HAS_I444TOARGBROW_AVX2 #define HAS_I444TORGB24ROW_AVX2 -#define HAS_J400TOARGBROW_AVX2 +#define HAS_INTERPOLATEROW_AVX2 +#define HAS_J422TOARGBROW_AVX2 #define HAS_MIRRORROW_AVX2 -#define HAS_MIRRORSPLITUVROW_AVX2 #define HAS_NV12TOARGBROW_AVX2 #define HAS_NV12TORGB24ROW_AVX2 +#define HAS_NV12TORGB565ROW_AVX2 #define HAS_NV21TOARGBROW_AVX2 #define HAS_NV21TORGB24ROW_AVX2 #define HAS_SPLITUVROW_AVX2 #define HAS_UYVYTOARGBROW_AVX2 +#define HAS_UYVYTOUV422ROW_AVX2 +#define HAS_UYVYTOUVROW_AVX2 #define HAS_UYVYTOYROW_AVX2 #define HAS_YUY2TOARGBROW_AVX2 #define HAS_YUY2TOUV422ROW_AVX2 @@ -184,7 +197,7 @@ extern "C" { // The following are available for gcc/clang x86 platforms: // TODO(fbarchard): Port to Visual C #if !defined(LIBYUV_DISABLE_X86) && defined(USE_ROW_GCC) && \ - (defined(__x86_64__) || defined(__i386__)) && \ + (defined(__x86_64__) || defined(__i386__)) && \ !defined(LIBYUV_ENABLE_ROWWIN) #define HAS_AB64TOARGBROW_SSSE3 #define HAS_ABGRTOAR30ROW_SSSE3 @@ -222,11 +235,8 @@ extern "C" { #define HAS_P410TOAR30ROW_SSSE3 #define HAS_P410TOARGBROW_SSSE3 #define HAS_RAWTOARGBROW_AVX2 -#define HAS_RGB24TOARGBROW_AVX2 -#define HAS_RGB565TOARGBROW_AVX2 -#define HAS_ARGB1555TOARGBROW_AVX2 -#define HAS_ARGB4444TOARGBROW_AVX2 #define HAS_RAWTORGBAROW_SSSE3 +#define HAS_RGB24MIRRORROW_SSSE3 #define HAS_RGBATOYJROW_SSSE3 #define HAS_SPLITARGBROW_SSE2 #define HAS_SPLITARGBROW_SSSE3 @@ -241,11 +251,16 @@ extern "C" { #define HAS_ARGBTOYROW_SSSE3 #define HAS_ARGBTOYMATRIXROW_SSSE3 #define HAS_BGRATOYROW_SSSE3 +#define HAS_ABGRTOYROW_SSSE3 +#define HAS_RGBATOYROW_SSSE3 // TODO: adjust row_win to use 8 bit negative coefficients. #define HAS_ABGRTOUVJROW_SSSE3 #define HAS_ARGBTOUVJROW_SSSE3 +#define HAS_ABGRTOUVROW_SSSE3 #define HAS_ARGBTOUVROW_SSSE3 +#define HAS_BGRATOUVROW_SSSE3 +#define HAS_RGBATOUVROW_SSSE3 #define HAS_ARGBTOUVMATRIXROW_SSSE3 #define HAS_ARGBTOUV444MATRIXROW_SSSE3 @@ -260,8 +275,8 @@ extern "C" { // The following are available for AVX2 gcc/clang x86 platforms: // TODO(fbarchard): Port to Visual C #if !defined(LIBYUV_DISABLE_X86) && defined(USE_ROW_GCC) && \ - (defined(__x86_64__) || defined(__i386__)) && \ - (defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2)) && \ + (defined(__x86_64__) || defined(__i386__)) && \ + (defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2)) && \ !defined(LIBYUV_ENABLE_ROWWIN) #define HAS_AB64TOARGBROW_AVX2 #define HAS_ABGRTOAR30ROW_AVX2 @@ -281,18 +296,10 @@ extern "C" { #define HAS_ARGBTOUVJROW_AVX2 #define HAS_ARGBTOUVROW_AVX2 #define HAS_ARGBTOUVMATRIXROW_AVX2 -#define HAS_RGBTOUVMATRIXROW_AVX2 -#define HAS_RGB565TOUVMATRIXROW_AVX2 -#define HAS_ARGB1555TOUVMATRIXROW_AVX2 -#define HAS_ARGB4444TOUVMATRIXROW_AVX2 #define HAS_ARGBTOUV444MATRIXROW_AVX2 #define HAS_ARGBTOYJROW_AVX2 #define HAS_ARGBTOYROW_AVX2 #define HAS_ARGBTOYMATRIXROW_AVX2 -#define HAS_RGBTOYMATRIXROW_AVX2 -#define HAS_RGB565TOYMATRIXROW_AVX2 -#define HAS_ARGB1555TOYMATRIXROW_AVX2 -#define HAS_ARGB4444TOYMATRIXROW_AVX2 #define HAS_ARGBUNATTENUATEROW_AVX2 #define HAS_CONVERT16TO8ROW_AVX2 #define HAS_CONVERT8TO16ROW_AVX2 @@ -343,34 +350,24 @@ extern "C" { #endif // This module is for Visual C 32/64 bit -#if !defined(LIBYUV_DISABLE_X86) && defined(USE_ROW_WIN) && \ - (defined(__x86_64__) || defined(__i386__) || defined(_M_X64) || \ - defined(_M_X86)) && \ - ((defined(_MSC_VER) && !defined(__clang__)) || \ +#if !defined(LIBYUV_DISABLE_X86) && defined(USE_ROW_WIN) && \ + (defined(__x86_64__) || defined(__i386__) || \ + defined(_M_X64) || defined(_M_X86)) && \ + ((defined(_MSC_VER) && !defined(__clang__)) || \ defined(LIBYUV_ENABLE_ROWWIN)) #define HAS_RAWTOARGBROW_AVX2 -#define HAS_RGB24TOARGBROW_AVX2 -#define HAS_RGB565TOARGBROW_AVX2 -#define HAS_ARGB1555TOARGBROW_AVX2 -#define HAS_ARGB4444TOARGBROW_AVX2 -#define HAS_ARGBSHUFFLEROW_AVX2 #if defined(__x86_64__) || defined(_M_X64) #define HAS_RAWTOARGBROW_AVX512BW #define HAS_RGB24TOARGBROW_AVX512BW -#define HAS_ARGBSHUFFLEROW_AVX512BW #endif #define HAS_ARGBTOYROW_AVX2 #define HAS_ARGBTOYMATRIXROW_AVX2 -#define HAS_RGBTOYMATRIXROW_AVX2 -#define HAS_RGB565TOYMATRIXROW_AVX2 -#define HAS_ARGB1555TOYMATRIXROW_AVX2 -#define HAS_ARGB4444TOYMATRIXROW_AVX2 -#define HAS_ARGBTOUV444MATRIXROW_AVX2 #define HAS_ABGRTOYROW_AVX2 #define HAS_ARGBTOYJROW_AVX2 #define HAS_ABGRTOYJROW_AVX2 #define HAS_RGBATOYJROW_AVX2 -#define HAS_J400TOARGBROW_AVX2 +#define HAS_RGBATOYROW_AVX2 +#define HAS_BGRATOYROW_AVX2 #endif // The following are available for AVX512 clang x86 platforms: @@ -386,6 +383,7 @@ extern "C" { #endif #define HAS_ARGBTORGB24ROW_AVX512VBMI #define HAS_CONVERT16TO8ROW_AVX512BW +#define HAS_MERGEUVROW_AVX512BW #endif // The following are available for AVX512 clang x64 platforms: @@ -393,23 +391,14 @@ extern "C" { #if !defined(LIBYUV_DISABLE_X86) && defined(__x86_64__) && \ (defined(CLANG_HAS_AVX512)) #define HAS_I422TOARGBROW_AVX512BW -#define HAS_ARGBSHUFFLEROW_AVX512BW #define HAS_ARGBTOUV444ROW_AVX512BW #define HAS_ARGBTOUV444MATRIXROW_AVX512BW #define HAS_ARGBTOYROW_AVX512BW #define HAS_ARGBTOYMATRIXROW_AVX512BW -#define HAS_I422TORGB24ROW_AVX512VBMI -#define HAS_I422TORGB24ROW_AVX512BW #define HAS_ARGBTOUVJ444ROW_AVX512BW #define HAS_ARGBTOUVROW_AVX512BW #define HAS_ARGBTOUVJROW_AVX512BW #define HAS_ARGBTOUVMATRIXROW_AVX512BW -#define HAS_J400TOARGBROW_AVX512BW -#define HAS_MERGEUVROW_AVX512BW -#define HAS_MIRRORROW_AVX512BW -#define HAS_MIRRORSPLITUVROW_AVX512BW -#define HAS_SPLITUVROW_AVX512BW -#define HAS_RGBTOUVMATRIXROW_AVX512BW #endif // The following are available on Neon platforms: @@ -445,21 +434,14 @@ extern "C" { #define HAS_ARGBTOUVJROW_NEON #if !defined(__GNUC__) || defined(__clang__) #define HAS_ARGBTOUVMATRIXROW_NEON -#define HAS_RGBTOUVMATRIXROW_NEON -#define HAS_RGB565TOUVMATRIXROW_NEON -#define HAS_ARGB1555TOUVMATRIXROW_NEON -#define HAS_ARGB4444TOUVMATRIXROW_NEON #endif #define HAS_ARGBTOUVROW_NEON #define HAS_ARGBTOYJROW_NEON #if !defined(__aarch64__) #define HAS_ARGBTOYMATRIXROW_NEON -#define HAS_RGB565TOYMATRIXROW_NEON -#define HAS_ARGB1555TOYMATRIXROW_NEON -#define HAS_ARGB4444TOYMATRIXROW_NEON -#define HAS_RGBTOYMATRIXROW_NEON #endif #define HAS_ARGBTOYROW_NEON +#define HAS_AYUVTOUVROW_NEON #define HAS_AYUVTOVUROW_NEON #define HAS_AYUVTOYROW_NEON #define HAS_BGRATOUVROW_NEON @@ -533,6 +515,7 @@ extern "C" { #define HAS_SWAPUVROW_NEON #define HAS_UNPACKMT2T_NEON #define HAS_UYVYTOARGBROW_NEON +#define HAS_UYVYTOUV422ROW_NEON #define HAS_UYVYTOUVROW_NEON #define HAS_UYVYTOYROW_NEON #define HAS_YUY2TOARGBROW_NEON @@ -599,7 +582,6 @@ extern "C" { #define HAS_ARGBTOUVJ444ROW_NEON_I8MM #define HAS_ARGBTOUVJROW_NEON_I8MM #define HAS_ARGBTOUVMATRIXROW_NEON_I8MM -#define HAS_RGBTOUVMATRIXROW_NEON #define HAS_ARGBTOUVROW_NEON_I8MM #define HAS_BGRATOUVROW_NEON_I8MM #define HAS_RGBATOUVROW_NEON_I8MM @@ -1050,13 +1032,10 @@ struct ArgbConstants { #endif -#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1))) +#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a)-1))) #define align_buffer_64(var, size) \ - size_t var##_mem_size = (size); /* NOLINT */ \ - void* var##_mem = (var##_mem_size > SIZE_MAX - 63) \ - ? NULL \ - : malloc(var##_mem_size + 63); /* NOLINT */ \ + void* var##_mem = malloc((size) + 63); /* NOLINT */ \ uint8_t* var = (uint8_t*)(((intptr_t)var##_mem + 63) & ~63) /* NOLINT */ #define free_aligned_buffer_64(var) \ @@ -1106,17 +1085,26 @@ struct ArgbConstants { #define IACA_UD_BYTES __asm__ __volatile__("\n\t .byte 0x0F, 0x0B"); #else /* Visual C */ -#define IACA_UD_BYTES {__asm _emit 0x0F __asm _emit 0x0B} +#define IACA_UD_BYTES \ + { __asm _emit 0x0F __asm _emit 0x0B } #define IACA_SSC_MARK(x) \ - {__asm mov ebx, x __asm _emit 0x64 __asm _emit 0x67 __asm _emit 0x90} + { __asm mov ebx, x __asm _emit 0x64 __asm _emit 0x67 __asm _emit 0x90 } #define IACA_VC64_START __writegsbyte(111, 111); #define IACA_VC64_END __writegsbyte(222, 222); #endif -#define IACA_START {IACA_UD_BYTES IACA_SSC_MARK(111)} -#define IACA_END {IACA_SSC_MARK(222) IACA_UD_BYTES} +#define IACA_START \ + { \ + IACA_UD_BYTES \ + IACA_SSC_MARK(111) \ + } +#define IACA_END \ + { \ + IACA_SSC_MARK(222) \ + IACA_UD_BYTES \ + } void I210AlphaToARGBRow_NEON(const uint16_t* src_y, const uint16_t* src_u, @@ -1828,9 +1816,9 @@ void ARGBToUV444MatrixRow_NEON(const uint8_t* src_argb, int width, const struct ArgbConstants* c); void ARGBToYMatrixRow_NEON(const uint8_t* src_argb, - uint8_t* dst_y, - int width, - const struct ArgbConstants* c); + uint8_t* dst_y, + int width, + const struct ArgbConstants* c); void ARGBToUV444MatrixRow_Any_NEON(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, @@ -2180,76 +2168,12 @@ void ARGBToUVMatrixRow_C(const uint8_t* src_argb, uint8_t* dst_v, int width, const struct ArgbConstants* c); -void RGBToYMatrixRow_C(const uint8_t* src_rgb, - uint8_t* dst_y, - int width, - const struct ArgbConstants* c); -void RGBToUVMatrixRow_C(const uint8_t* src_rgb, - int src_stride_rgb, - uint8_t* dst_u, - uint8_t* dst_v, - int width, - const struct ArgbConstants* c); -void RGB565ToYMatrixRow_C(const uint8_t* src_rgb565, - uint8_t* dst_y, - int width, - const struct ArgbConstants* c); -void ARGB1555ToYMatrixRow_C(const uint8_t* src_argb1555, - uint8_t* dst_y, - int width, - const struct ArgbConstants* c); -void ARGB1555ToUVMatrixRow_C(const uint8_t* src_argb1555, - int src_stride_argb1555, - uint8_t* dst_u, - uint8_t* dst_v, - int width, - const struct ArgbConstants* c); -void ARGB4444ToYMatrixRow_C(const uint8_t* src_argb4444, - uint8_t* dst_y, - int width, - const struct ArgbConstants* c); -void ARGB4444ToUVMatrixRow_C(const uint8_t* src_argb4444, - int src_stride_argb4444, - uint8_t* dst_u, - uint8_t* dst_v, - int width, - const struct ArgbConstants* c); -void RGB565ToUVMatrixRow_C(const uint8_t* src_rgb565, - int src_stride_rgb565, - uint8_t* dst_u, - uint8_t* dst_v, - int width, - const struct ArgbConstants* c); void ARGBToUVMatrixRow_SSSE3(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width, const struct ArgbConstants* c); -void RGBToUVMatrixRow_AVX2(const uint8_t* src_rgb, - int src_stride_rgb, - uint8_t* dst_u, - uint8_t* dst_v, - int width, - const struct ArgbConstants* c); -void RGBToUVMatrixRow_Any_AVX2(const uint8_t* src_rgb, - int src_stride_rgb, - uint8_t* dst_u, - uint8_t* dst_v, - int width, - const struct ArgbConstants* c); -void RGBToUVMatrixRow_AVX512BW(const uint8_t* src_rgb, - int src_stride_rgb, - uint8_t* dst_u, - uint8_t* dst_v, - int width, - const struct ArgbConstants* c); -void RGBToUVMatrixRow_Any_AVX512BW(const uint8_t* src_rgb, - int src_stride_rgb, - uint8_t* dst_u, - uint8_t* dst_v, - int width, - const struct ArgbConstants* c); void ARGBToUVMatrixRow_AVX2(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u, @@ -2271,135 +2195,6 @@ void ARGBToYMatrixRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width, const struct ArgbConstants* c); -void RGBToYMatrixRow_AVX2(const uint8_t* src_rgb, - uint8_t* dst_y, - int width, - const struct ArgbConstants* c); -void RGBToYMatrixRow_Any_AVX2(const uint8_t* src_rgb, - uint8_t* dst_y, - int width, - const struct ArgbConstants* c); -void RGB565ToYMatrixRow_AVX2(const uint8_t* src_rgb565, - uint8_t* dst_y, - int width, - const struct ArgbConstants* c); -void RGB565ToYMatrixRow_Any_AVX2(const uint8_t* src_rgb565, - uint8_t* dst_y, - int width, - const struct ArgbConstants* c); -void ARGB1555ToYMatrixRow_AVX2(const uint8_t* src_argb1555, - uint8_t* dst_y, - int width, - const struct ArgbConstants* c); -void ARGB1555ToYMatrixRow_Any_AVX2(const uint8_t* src_argb1555, - uint8_t* dst_y, - int width, - const struct ArgbConstants* c); -void ARGB1555ToUVMatrixRow_AVX2(const uint8_t* src_argb1555, - int src_stride_argb1555, - uint8_t* dst_u, - uint8_t* dst_v, - int width, - const struct ArgbConstants* c); -void ARGB1555ToUVMatrixRow_Any_AVX2(const uint8_t* src_argb1555, - int src_stride_argb1555, - uint8_t* dst_u, - uint8_t* dst_v, - int width, - const struct ArgbConstants* c); -void ARGB4444ToYMatrixRow_AVX2(const uint8_t* src_argb4444, - uint8_t* dst_y, - int width, - const struct ArgbConstants* c); -void ARGB4444ToYMatrixRow_Any_AVX2(const uint8_t* src_argb4444, - uint8_t* dst_y, - int width, - const struct ArgbConstants* c); -void ARGB4444ToUVMatrixRow_AVX2(const uint8_t* src_argb4444, - int src_stride_argb4444, - uint8_t* dst_u, - uint8_t* dst_v, - int width, - const struct ArgbConstants* c); -void ARGB4444ToUVMatrixRow_Any_AVX2(const uint8_t* src_argb4444, - int src_stride_argb4444, - uint8_t* dst_u, - uint8_t* dst_v, - int width, - const struct ArgbConstants* c); -void RGB565ToUVMatrixRow_AVX2(const uint8_t* src_rgb565, - int src_stride_rgb565, - uint8_t* dst_u, - uint8_t* dst_v, - int width, - const struct ArgbConstants* c); -void RGB565ToUVMatrixRow_Any_AVX2(const uint8_t* src_rgb565, - int src_stride_rgb565, - uint8_t* dst_u, - uint8_t* dst_v, - int width, - const struct ArgbConstants* c); -void RGB565ToYMatrixRow_NEON(const uint8_t* src_rgb565, - uint8_t* dst_y, - int width, - const struct ArgbConstants* c); -void ARGB1555ToYMatrixRow_NEON(const uint8_t* src_argb1555, - uint8_t* dst_y, - int width, - const struct ArgbConstants* c); -void ARGB1555ToUVMatrixRow_NEON(const uint8_t* src_argb1555, - int src_stride_argb1555, - uint8_t* dst_u, - uint8_t* dst_v, - int width, - const struct ArgbConstants* c); -void ARGB4444ToYMatrixRow_NEON(const uint8_t* src_argb4444, - uint8_t* dst_y, - int width, - const struct ArgbConstants* c); -void ARGB4444ToUVMatrixRow_NEON(const uint8_t* src_argb4444, - int src_stride_argb4444, - uint8_t* dst_u, - uint8_t* dst_v, - int width, - const struct ArgbConstants* c); -void RGB565ToUVMatrixRow_NEON(const uint8_t* src_rgb565, - int src_stride_rgb565, - uint8_t* dst_u, - uint8_t* dst_v, - int width, - const struct ArgbConstants* c); -void RGB565ToYMatrixRow_Any_NEON(const uint8_t* src_rgb565, - uint8_t* dst_y, - int width, - const struct ArgbConstants* c); -void ARGB1555ToYMatrixRow_Any_NEON(const uint8_t* src_argb1555, - uint8_t* dst_y, - int width, - const struct ArgbConstants* c); -void ARGB1555ToUVMatrixRow_Any_NEON(const uint8_t* src_argb1555, - int src_stride_argb1555, - uint8_t* dst_u, - uint8_t* dst_v, - int width, - const struct ArgbConstants* c); -void ARGB4444ToYMatrixRow_Any_NEON(const uint8_t* src_argb4444, - uint8_t* dst_y, - int width, - const struct ArgbConstants* c); -void ARGB4444ToUVMatrixRow_Any_NEON(const uint8_t* src_argb4444, - int src_stride_argb4444, - uint8_t* dst_u, - uint8_t* dst_v, - int width, - const struct ArgbConstants* c); -void RGB565ToUVMatrixRow_Any_NEON(const uint8_t* src_rgb565, - int src_stride_rgb565, - uint8_t* dst_u, - uint8_t* dst_v, - int width, - const struct ArgbConstants* c); - void ARGBToYMatrixRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width, @@ -2426,23 +2221,6 @@ void RGBToYMatrixRow_NEON(const uint8_t* src_rgb, int width, const struct ArgbConstants* c); -void RGBToUVMatrixRow_NEON(const uint8_t* src_rgb, - int src_stride_rgb, - uint8_t* dst_u, - uint8_t* dst_v, - int width, - const struct ArgbConstants* c); -void RGBToYMatrixRow_Any_NEON(const uint8_t* src_rgb, - uint8_t* dst_y, - int width, - const struct ArgbConstants* c); -void RGBToUVMatrixRow_Any_NEON(const uint8_t* src_rgb, - int src_stride_rgb, - uint8_t* dst_u, - uint8_t* dst_v, - int width, - const struct ArgbConstants* c); - void ARGBToYMatrixRow_NEON_DotProd(const uint8_t* src_argb, uint8_t* dst_y, int width, @@ -2473,6 +2251,7 @@ void ARGBToYMatrixRow_Any_LASX(const uint8_t* src_argb, int width, const struct ArgbConstants* c); + void ARGBToUV444MatrixRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, @@ -2530,29 +2309,15 @@ void RGBAToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width); void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width); void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width); void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width); -void ARGBToYRow_Any_AVX512BW(const uint8_t* src_ptr, - uint8_t* dst_ptr, - int width); -void ARGBToYJRow_Any_AVX512BW(const uint8_t* src_ptr, - uint8_t* dst_ptr, - int width); -void ABGRToYRow_Any_AVX512BW(const uint8_t* src_ptr, - uint8_t* dst_ptr, - int width); -void ABGRToYJRow_Any_AVX512BW(const uint8_t* src_ptr, - uint8_t* dst_ptr, - int width); +void ARGBToYRow_Any_AVX512BW(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ARGBToYJRow_Any_AVX512BW(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ABGRToYRow_Any_AVX512BW(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void ABGRToYJRow_Any_AVX512BW(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RGBAToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void RGBAToYRow_Any_AVX512BW(const uint8_t* src_ptr, - uint8_t* dst_ptr, - int width); -void RGBAToYJRow_Any_AVX512BW(const uint8_t* src_ptr, - uint8_t* dst_ptr, - int width); +void RGBAToYRow_Any_AVX512BW(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGBAToYJRow_Any_AVX512BW(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void BGRAToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void BGRAToYRow_Any_AVX512BW(const uint8_t* src_ptr, - uint8_t* dst_ptr, - int width); +void BGRAToYRow_Any_AVX512BW(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToYJRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ABGRToYJRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); @@ -3152,16 +2917,12 @@ void ARGBToUVJ444Row_C(const uint8_t* src_argb, uint8_t* dst_v, int width); -void MirrorRow_AVX512BW(const uint8_t* src, uint8_t* dst, int width); void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width); void MirrorRow_SSSE3(const uint8_t* src, uint8_t* dst, int width); void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width); void MirrorRow_LSX(const uint8_t* src, uint8_t* dst, int width); void MirrorRow_LASX(const uint8_t* src, uint8_t* dst, int width); void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width); -void MirrorRow_Any_AVX512BW(const uint8_t* src_ptr, - uint8_t* dst_ptr, - int width); void MirrorRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorRow_Any_SSE2(const uint8_t* src, uint8_t* dst, int width); @@ -3175,18 +2936,15 @@ void MirrorUVRow_LSX(const uint8_t* src_uv, uint8_t* dst_uv, int width); void MirrorUVRow_LASX(const uint8_t* src_uv, uint8_t* dst_uv, int width); void MirrorUVRow_C(const uint8_t* src_uv, uint8_t* dst_uv, int width); void MirrorUVRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void MirrorUVRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorUVRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorUVRow_Any_LSX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorUVRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void MirrorSplitUVRow_AVX512BW(const uint8_t* src, - uint8_t* dst_u, - uint8_t* dst_v, - int width); -void MirrorSplitUVRow_AVX2(const uint8_t* src, - uint8_t* dst_u, - uint8_t* dst_v, - int width); +void MirrorSplitUVRow_SSSE3(const uint8_t* src, + uint8_t* dst_u, + uint8_t* dst_v, + int width); void MirrorSplitUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, @@ -3222,16 +2980,16 @@ void ARGBMirrorRow_Any_LASX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void RGB24MirrorRow_AVX2(const uint8_t* src_rgb24, - uint8_t* dst_rgb24, - int width); +void RGB24MirrorRow_SSSE3(const uint8_t* src_rgb24, + uint8_t* dst_rgb24, + int width); void RGB24MirrorRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_rgb24, int width); void RGB24MirrorRow_C(const uint8_t* src_rgb24, uint8_t* dst_rgb24, int width); -void RGB24MirrorRow_Any_AVX2(const uint8_t* src_ptr, - uint8_t* dst_ptr, - int width); +void RGB24MirrorRow_Any_SSSE3(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); void RGB24MirrorRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); @@ -3244,10 +3002,6 @@ void SplitUVRow_SSE2(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width); -void SplitUVRow_AVX512BW(const uint8_t* src_uv, - uint8_t* dst_u, - uint8_t* dst_v, - int width); void SplitUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, @@ -3264,10 +3018,6 @@ void SplitUVRow_RVV(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width); -void SplitUVRow_Any_AVX512BW(const uint8_t* src_ptr, - uint8_t* dst_u, - uint8_t* dst_v, - int width); void SplitUVRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, @@ -4242,10 +3992,6 @@ void ARGBShuffleRow_AVX2(const uint8_t* src_argb, uint8_t* dst_argb, const uint8_t* shuffler, int width); -void ARGBShuffleRow_AVX512BW(const uint8_t* src_argb, - uint8_t* dst_argb, - const uint8_t* shuffler, - int width); void ARGBShuffleRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, const uint8_t* shuffler, @@ -4266,10 +4012,6 @@ void ARGBShuffleRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, const uint8_t* param, int width); -void ARGBShuffleRow_Any_AVX512BW(const uint8_t* src_ptr, - uint8_t* dst_ptr, - const uint8_t* param, - int width); void ARGBShuffleRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, const uint8_t* param, @@ -4288,15 +4030,12 @@ void RGB24ToARGBRow_SSSE3(const uint8_t* src_rgb24, int width); void RAWToARGBRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_argb, int width); void RAWToARGBRow_AVX2(const uint8_t* src_raw, uint8_t* dst_argb, int width); -void RGB24ToARGBRow_AVX2(const uint8_t* src_rgb24, - uint8_t* dst_argb, - int width); -void RAWToARGBRow_AVX512BW(const uint8_t* src_raw, - uint8_t* dst_argb, - int width); +void RAWToARGBRow_AVX512BW(const uint8_t* src_raw, uint8_t* dst_argb, int width); void RAWToRGBARow_SSSE3(const uint8_t* src_raw, uint8_t* dst_rgba, int width); void RAWToRGB24Row_SSSE3(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); - +void RGB565ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width); +void ARGB1555ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width); +void ARGB4444ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width); void RGB565ToARGBRow_AVX2(const uint8_t* src_rgb565, uint8_t* dst_argb, int width); @@ -4383,18 +4122,9 @@ void RAWToARGBRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RAWToARGBRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); -void RGB24ToARGBRow_Any_AVX2(const uint8_t* src_ptr, - uint8_t* dst_ptr, - int width); -void RAWToARGBRow_Any_AVX512BW(const uint8_t* src_ptr, - uint8_t* dst_ptr, - int width); -void RGB24ToARGBRow_AVX512BW(const uint8_t* src_rgb24, - uint8_t* dst_argb, - int width); -void RGB24ToARGBRow_Any_AVX512BW(const uint8_t* src_ptr, - uint8_t* dst_ptr, - int width); +void RAWToARGBRow_Any_AVX512BW(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGB24ToARGBRow_AVX512BW(const uint8_t* src_rgb24, uint8_t* dst_argb, int width); +void RGB24ToARGBRow_Any_AVX512BW(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RAWToRGBARow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); @@ -4402,6 +4132,15 @@ void RAWToRGB24Row_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); +void RGB565ToARGBRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGB1555ToARGBRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGB4444ToARGBRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); void RGB565ToARGBRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); @@ -4460,7 +4199,9 @@ void ARGB4444ToARGBRow_Any_LASX(const uint8_t* src_ptr, void ARGBToRGB24Row_SSSE3(const uint8_t* src, uint8_t* dst, int width); void ARGBToRAWRow_SSSE3(const uint8_t* src, uint8_t* dst, int width); - +void ARGBToRGB565Row_SSE2(const uint8_t* src, uint8_t* dst, int width); +void ARGBToARGB1555Row_SSE2(const uint8_t* src, uint8_t* dst, int width); +void ARGBToARGB4444Row_SSE2(const uint8_t* src, uint8_t* dst, int width); void ABGRToAR30Row_SSSE3(const uint8_t* src, uint8_t* dst, int width); void ARGBToAR30Row_SSSE3(const uint8_t* src, uint8_t* dst, int width); @@ -4473,7 +4214,10 @@ void ARGBToRGB565DitherRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, uint32_t dither4, int width); - +void ARGBToRGB565DitherRow_SSE2(const uint8_t* src, + uint8_t* dst, + uint32_t dither4, + int width); void ARGBToRGB565DitherRow_AVX2(const uint8_t* src, uint8_t* dst, uint32_t dither4, @@ -4635,15 +4379,15 @@ void AB64ToARGBRow_Any_NEON(const uint16_t* src_ptr, uint8_t* dst_ptr, int width); -void J400ToARGBRow_AVX512BW(const uint8_t* src_y, uint8_t* dst_argb, int width); +void J400ToARGBRow_SSE2(const uint8_t* src_y, uint8_t* dst_argb, int width); void J400ToARGBRow_AVX2(const uint8_t* src_y, uint8_t* dst_argb, int width); void J400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width); void J400ToARGBRow_LSX(const uint8_t* src_y, uint8_t* dst_argb, int width); void J400ToARGBRow_RVV(const uint8_t* src_y, uint8_t* dst_argb, int width); void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width); -void J400ToARGBRow_Any_AVX512BW(const uint8_t* src_ptr, - uint8_t* dst_ptr, - int width); +void J400ToARGBRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); void J400ToARGBRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); @@ -4858,12 +4602,6 @@ void I444ToARGBRow_AVX2(const uint8_t* y_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); -void I444ToARGBRow_AVX512BW(const uint8_t* y_buf, - const uint8_t* u_buf, - const uint8_t* v_buf, - uint8_t* dst_argb, - const struct YuvConstants* yuvconstants, - int width); void I444ToRGB24Row_SSSE3(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -5043,7 +4781,11 @@ void NV21ToRGB24Row_SSSE3(const uint8_t* src_y, uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width); - +void NV12ToRGB565Row_SSSE3(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb565, + const struct YuvConstants* yuvconstants, + int width); void NV12ToRGB24Row_AVX2(const uint8_t* src_y, const uint8_t* src_uv, uint8_t* dst_rgb24, @@ -5062,7 +4804,11 @@ void NV21ToYUV24Row_AVX2(const uint8_t* src_y, const uint8_t* src_vu, uint8_t* dst_yuv24, int width); - +void NV12ToRGB565Row_AVX2(const uint8_t* src_y, + const uint8_t* src_uv, + uint8_t* dst_rgb565, + const struct YuvConstants* yuvconstants, + int width); void NV21ToARGBRow_SSSE3(const uint8_t* y_buf, const uint8_t* vu_buf, uint8_t* dst_argb, @@ -5137,7 +4883,42 @@ void I422ToRGBARow_SSSE3(const uint8_t* y_buf, uint8_t* dst_rgba, const struct YuvConstants* yuvconstants, int width); - +void I422ToARGB4444Row_SSSE3(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb4444, + const struct YuvConstants* yuvconstants, + int width); +void I422ToARGB4444Row_AVX2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb4444, + const struct YuvConstants* yuvconstants, + int width); +void I422ToARGB1555Row_SSSE3(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb1555, + const struct YuvConstants* yuvconstants, + int width); +void I422ToARGB1555Row_AVX2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb1555, + const struct YuvConstants* yuvconstants, + int width); +void I422ToRGB565Row_SSSE3(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb565, + const struct YuvConstants* yuvconstants, + int width); +void I422ToRGB565Row_AVX2(const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_rgb565, + const struct YuvConstants* yuvconstants, + int width); void I422ToRGB24Row_SSSE3(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -5150,18 +4931,6 @@ void I422ToRGB24Row_AVX2(const uint8_t* src_y, uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB24Row_AVX512VBMI(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_rgb24, - const struct YuvConstants* yuvconstants, - int width); -void I422ToRGB24Row_AVX512BW(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_rgb24, - const struct YuvConstants* yuvconstants, - int width); void I422ToARGBRow_Any_AVX2(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -5198,12 +4967,6 @@ void I444ToARGBRow_Any_AVX2(const uint8_t* y_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I444ToARGBRow_Any_AVX512BW(const uint8_t* y_buf, - const uint8_t* u_buf, - const uint8_t* v_buf, - uint8_t* dst_ptr, - const struct YuvConstants* yuvconstants, - int width); void I444ToRGB24Row_Any_AVX2(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -5404,7 +5167,16 @@ void NV21ToYUV24Row_Any_AVX2(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); - +void NV12ToRGB565Row_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void NV12ToRGB565Row_Any_AVX2(const uint8_t* y_buf, + const uint8_t* uv_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); void YUY2ToARGBRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, @@ -5467,7 +5239,42 @@ void I422ToRGBARow_Any_SSSE3(const uint8_t* y_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); - +void I422ToARGB4444Row_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I422ToARGB4444Row_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I422ToARGB1555Row_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I422ToARGB1555Row_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I422ToRGB565Row_Any_SSSE3(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); +void I422ToRGB565Row_Any_AVX2(const uint8_t* y_buf, + const uint8_t* u_buf, + const uint8_t* v_buf, + uint8_t* dst_ptr, + const struct YuvConstants* yuvconstants, + int width); void I422ToRGB24Row_Any_SSSE3(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, @@ -5480,18 +5287,6 @@ void I422ToRGB24Row_Any_AVX2(const uint8_t* y_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); -void I422ToRGB24Row_Any_AVX512VBMI(const uint8_t* y_buf, - const uint8_t* u_buf, - const uint8_t* v_buf, - uint8_t* dst_ptr, - const struct YuvConstants* yuvconstants, - int width); -void I422ToRGB24Row_Any_AVX512BW(const uint8_t* y_buf, - const uint8_t* u_buf, - const uint8_t* v_buf, - uint8_t* dst_ptr, - const struct YuvConstants* yuvconstants, - int width); void I400ToARGBRow_C(const uint8_t* src_y, uint8_t* rgb_buf, @@ -5746,7 +5541,15 @@ void ARGBToRGB24Row_Any_SSSE3(const uint8_t* src_ptr, void ARGBToRAWRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); - +void ARGBToRGB565Row_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToARGB1555Row_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); +void ARGBToARGB4444Row_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + int width); void ABGRToAR30Row_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); @@ -5760,7 +5563,10 @@ void ARGBToRGB24Row_Any_AVX2(const uint8_t* src_ptr, void ARGBToRGB24Row_Any_AVX512VBMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); - +void ARGBToRGB565DitherRow_Any_SSE2(const uint8_t* src_ptr, + uint8_t* dst_ptr, + const uint32_t param, + int width); void ARGBToRGB565DitherRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, const uint32_t param, @@ -6819,6 +6625,11 @@ void InterpolateRow_C(uint8_t* dst_ptr, ptrdiff_t src_stride, int width, int source_y_fraction); +void InterpolateRow_SSSE3(uint8_t* dst_ptr, + const uint8_t* src_ptr, + ptrdiff_t src_stride, + int dst_width, + int source_y_fraction); void InterpolateRow_AVX2(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, @@ -6849,6 +6660,11 @@ void InterpolateRow_Any_NEON(uint8_t* dst_ptr, ptrdiff_t src_stride_ptr, int width, int source_y_fraction); +void InterpolateRow_Any_SSSE3(uint8_t* dst_ptr, + const uint8_t* src_ptr, + ptrdiff_t src_stride_ptr, + int width, + int source_y_fraction); void InterpolateRow_Any_AVX2(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride_ptr, @@ -6865,16 +6681,6 @@ void InterpolateRow_16_C(uint16_t* dst_ptr, ptrdiff_t src_stride, int width, int source_y_fraction); -void InterpolateRow_16_AVX2(uint16_t* dst_ptr, - const uint16_t* src_ptr, - ptrdiff_t src_stride, - int width, - int source_y_fraction); -void InterpolateRow_16_Any_AVX2(uint16_t* dst_ptr, - const uint16_t* src_ptr, - ptrdiff_t src_stride, - int width, - int source_y_fraction); void InterpolateRow_16_NEON(uint16_t* dst_ptr, const uint16_t* src_ptr, ptrdiff_t src_stride, diff --git a/include/libyuv/row_sve.h b/include/libyuv/row_sve.h index 280d635b9..f7e2123a7 100644 --- a/include/libyuv/row_sve.h +++ b/include/libyuv/row_sve.h @@ -631,8 +631,8 @@ static inline void I422ToRGB565Row_SVE_SC( // Calculate a predicate for the final iteration to deal with the tail. "cnth %[vl] \n" "whilelt p1.b, wzr, %w[width] \n" // - READYUV422_SVE_2X I422TORGB_SVE_2X - RGBTOARGB8_SVE_TOP_2X RGB8TORGB565_SVE_FROM_TOP_2X + READYUV422_SVE_2X I422TORGB_SVE_2X RGBTOARGB8_SVE_TOP_2X + RGB8TORGB565_SVE_FROM_TOP_2X // Need to permute the data on the final iteration such that the // predicates (.b) line up with the 16-bit element data. "trn1 z20.b, z18.b, z19.b \n" @@ -694,8 +694,8 @@ static inline void I422ToARGB1555Row_SVE_SC( // Calculate a predicate for the final iteration to deal with the tail. "cnth %[vl] \n" "whilelt p1.b, wzr, %w[width] \n" // - READYUV422_SVE_2X I422TORGB_SVE_2X - RGBTOARGB8_SVE_TOP_2X RGB8TOARGB1555_SVE_FROM_TOP_2X + READYUV422_SVE_2X I422TORGB_SVE_2X RGBTOARGB8_SVE_TOP_2X + RGB8TOARGB1555_SVE_FROM_TOP_2X "st2h {z0.h, z1.h}, p1, [%[dst]] \n" "99: \n" @@ -753,8 +753,8 @@ static inline void I422ToARGB4444Row_SVE_SC( // Calculate a predicate for the final iteration to deal with the tail. "cnth %[vl] \n" "whilelt p1.b, wzr, %w[width] \n" // - READYUV422_SVE_2X I422TORGB_SVE_2X - RGBTOARGB8_SVE_TOP_2X RGB8TOARGB4444_SVE_FROM_TOP_2X + READYUV422_SVE_2X I422TORGB_SVE_2X RGBTOARGB8_SVE_TOP_2X + RGB8TOARGB4444_SVE_FROM_TOP_2X "st2h {z0.h, z1.h}, p1, [%[dst]] \n" "99: \n" diff --git a/include/libyuv/version.h b/include/libyuv/version.h index 9f9d18da7..f384c1efb 100644 --- a/include/libyuv/version.h +++ b/include/libyuv/version.h @@ -11,6 +11,6 @@ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 1948 +#define LIBYUV_VERSION 1937 #endif // INCLUDE_LIBYUV_VERSION_H_ diff --git a/libyuv.gyp b/libyuv.gyp index fa4b146a4..394840216 100644 --- a/libyuv.gyp +++ b/libyuv.gyp @@ -122,6 +122,18 @@ 'include', '.', ], + 'conditions': [ + ['OS == "android" and target_arch == "arm64"', { + 'ldflags': [ + '-Wl,--dynamic-linker,/system/bin/linker64', + ], + }], + ['OS == "android" and target_arch != "arm64"', { + 'ldflags': [ + '-Wl,--dynamic-linker,/system/bin/linker', + ], + }], + ], #conditions }, 'sources': [ '<@(libyuv_sources)', diff --git a/libyuv.gypi b/libyuv.gypi index 5cf173ef3..44b127410 100644 --- a/libyuv.gypi +++ b/libyuv.gypi @@ -69,7 +69,6 @@ 'source/row_lsx.cc', 'source/row_neon.cc', 'source/row_neon64.cc', - 'source/row_rvv.cc', 'source/row_win.cc', 'source/scale.cc', 'source/scale_any.cc', @@ -80,7 +79,6 @@ 'source/scale_neon.cc', 'source/scale_neon64.cc', 'source/scale_rgb.cc', - 'source/scale_rvv.cc', 'source/scale_uv.cc', 'source/scale_win.cc', 'source/video_common.cc', diff --git a/source/compare.cc b/source/compare.cc index 10023301c..e85cc6d07 100644 --- a/source/compare.cc +++ b/source/compare.cc @@ -11,7 +11,6 @@ #include "libyuv/compare.h" #include -#include #include #ifdef _OPENMP #include @@ -107,11 +106,8 @@ uint32_t ARGBDetect(const uint8_t* argb, uint32_t fourcc = 0; int h; - if (!argb || width <= 0 || height <= 0) { - return fourcc; - } // Coalesce rows. - if (stride_argb == width * 4 && (ptrdiff_t)width * height <= INT_MAX) { + if (stride_argb == width * 4) { width *= height; height = 1; stride_argb = 0; @@ -249,12 +245,8 @@ uint64_t ComputeSumSquareErrorPlane(const uint8_t* src_a, int height) { uint64_t sse = 0; int h; - if (!src_a || !src_b || width <= 0 || height <= 0) { - return sse; - } // Coalesce rows. - if (stride_a == width && stride_b == width && - (ptrdiff_t)width * height <= INT_MAX) { + if (stride_a == width && stride_b == width) { width *= height; height = 1; stride_a = stride_b = 0; diff --git a/source/compare_neon64.cc b/source/compare_neon64.cc index 36c5e575c..756f83cb3 100644 --- a/source/compare_neon64.cc +++ b/source/compare_neon64.cc @@ -116,7 +116,7 @@ uint32_t HashDjb2_NEON(const uint8_t* src, int count, uint32_t seed) { uint32_t hash = seed; const uint32_t c16 = 0x92d9e201; // 33^16 uint32_t tmp, tmp2; - asm("ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [%[kIdx]] \n" + asm("ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [%[kIdx]] \n" "ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [%[kMuls]] \n" // count is always a multiple of 16. diff --git a/source/compare_win.cc b/source/compare_win.cc index 59374cd8a..9d5bb27cd 100644 --- a/source/compare_win.cc +++ b/source/compare_win.cc @@ -41,9 +41,8 @@ uint32_t HammingDistance_SSE42(const uint8_t* src_a, return diff; } -__declspec(naked) uint32_t SumSquareError_SSE2(const uint8_t* src_a, - const uint8_t* src_b, - int count) { +__declspec(naked) uint32_t + SumSquareError_SSE2(const uint8_t* src_a, const uint8_t* src_b, int count) { __asm { mov eax, [esp + 4] // src_a mov edx, [esp + 8] // src_b @@ -82,9 +81,8 @@ __declspec(naked) uint32_t SumSquareError_SSE2(const uint8_t* src_a, #ifdef HAS_SUMSQUAREERROR_AVX2 // C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX. #pragma warning(disable : 4752) -__declspec(naked) uint32_t SumSquareError_AVX2(const uint8_t* src_a, - const uint8_t* src_b, - int count) { +__declspec(naked) uint32_t + SumSquareError_AVX2(const uint8_t* src_a, const uint8_t* src_b, int count) { __asm { mov eax, [esp + 4] // src_a mov edx, [esp + 8] // src_b @@ -148,9 +146,8 @@ uvec32 kHashMul3 = { 0x00000001, // 33 ^ 0 }; -__declspec(naked) uint32_t HashDjb2_SSE41(const uint8_t* src, - int count, - uint32_t seed) { +__declspec(naked) uint32_t + HashDjb2_SSE41(const uint8_t* src, int count, uint32_t seed) { __asm { mov eax, [esp + 4] // src mov ecx, [esp + 8] // count @@ -200,9 +197,8 @@ __declspec(naked) uint32_t HashDjb2_SSE41(const uint8_t* src, // Visual C 2012 required for AVX2. #ifdef HAS_HASHDJB2_AVX2 -__declspec(naked) uint32_t HashDjb2_AVX2(const uint8_t* src, - int count, - uint32_t seed) { +__declspec(naked) uint32_t + HashDjb2_AVX2(const uint8_t* src, int count, uint32_t seed) { __asm { mov eax, [esp + 4] // src mov ecx, [esp + 8] // count diff --git a/source/convert.cc b/source/convert.cc index fbef68f57..0b90ffaaf 100644 --- a/source/convert.cc +++ b/source/convert.cc @@ -10,12 +10,10 @@ #include "libyuv/convert.h" -#include - #include "libyuv/basic_types.h" -#include "libyuv/convert_from_argb.h" #include "libyuv/cpu_id.h" #include "libyuv/planar_functions.h" +#include "libyuv/convert_from_argb.h" #include "libyuv/rotate.h" #include "libyuv/row.h" #include "libyuv/scale.h" // For ScalePlane() @@ -24,7 +22,6 @@ #ifdef __cplusplus namespace libyuv { - extern const struct ArgbConstants kArgbI601Constants; extern const struct ArgbConstants kArgbJPEGConstants; extern "C" { @@ -56,16 +53,16 @@ static int I4xxToI420(const uint8_t* src_y, int src_y_height, int src_uv_width, int src_uv_height) { - int r; - if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || - src_y_width <= 0 || src_y_height == 0 || src_y_height == INT_MIN || - src_uv_width <= 0 || src_uv_height == 0) { - return -1; - } const int dst_y_width = src_y_width; const int dst_y_height = Abs(src_y_height); const int dst_uv_width = SUBSAMPLE(dst_y_width, 1, 1); const int dst_uv_height = SUBSAMPLE(dst_y_height, 1, 1); + int r; + if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || + src_y_width <= 0 || src_y_height == 0 || src_uv_width <= 0 || + src_uv_height == 0) { + return -1; + } if (dst_y) { CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, src_y_width, src_y_height); @@ -99,16 +96,16 @@ int I420Copy(const uint8_t* src_y, int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 || - height == 0 || height == INT_MIN) { + height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; halfheight = (height + 1) >> 1; - src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y; - src_u = src_u + (ptrdiff_t)(halfheight - 1) * src_stride_u; - src_v = src_v + (ptrdiff_t)(halfheight - 1) * src_stride_v; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (halfheight - 1) * src_stride_u; + src_v = src_v + (halfheight - 1) * src_stride_v; src_stride_y = -src_stride_y; src_stride_u = -src_stride_u; src_stride_v = -src_stride_v; @@ -142,16 +139,16 @@ int I010Copy(const uint16_t* src_y, int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 || - height == 0 || height == INT_MIN) { + height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; halfheight = (height + 1) >> 1; - src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y; - src_u = src_u + (ptrdiff_t)(halfheight - 1) * src_stride_u; - src_v = src_v + (ptrdiff_t)(halfheight - 1) * src_stride_v; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (halfheight - 1) * src_stride_u; + src_v = src_v + (halfheight - 1) * src_stride_v; src_stride_y = -src_stride_y; src_stride_u = -src_stride_u; src_stride_v = -src_stride_v; @@ -183,20 +180,20 @@ static int Planar16bitTo8bit(const uint16_t* src_y, int subsample_x, int subsample_y, int depth) { - if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 || - height == 0 || height == INT_MIN) { - return -1; - } int uv_width = SUBSAMPLE(width, subsample_x, subsample_x); int uv_height = SUBSAMPLE(height, subsample_y, subsample_y); int scale = 1 << (24 - depth); + if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 || + height == 0) { + return -1; + } // Negative height means invert the image. if (height < 0) { height = -height; uv_height = -uv_height; - src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y; - src_u = src_u + (ptrdiff_t)(uv_height - 1) * src_stride_u; - src_v = src_v + (ptrdiff_t)(uv_height - 1) * src_stride_v; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (uv_height - 1) * src_stride_u; + src_v = src_v + (uv_height - 1) * src_stride_v; src_stride_y = -src_stride_y; src_stride_u = -src_stride_u; src_stride_v = -src_stride_v; @@ -232,15 +229,15 @@ static int I41xToI420(const uint16_t* src_y, int depth) { const int scale = 1 << (24 - depth); - if (width <= 0 || height == 0 || height == INT_MIN) { + if (width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y; - src_u = src_u + (ptrdiff_t)(height - 1) * src_stride_u; - src_v = src_v + (ptrdiff_t)(height - 1) * src_stride_v; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (height - 1) * src_stride_u; + src_v = src_v + (height - 1) * src_stride_v; src_stride_y = -src_stride_y; src_stride_u = -src_stride_u; src_stride_v = -src_stride_v; @@ -277,15 +274,15 @@ static int I21xToI420(const uint16_t* src_y, int depth) { const int scale = 1 << (24 - depth); - if (width <= 0 || height == 0 || height == INT_MIN) { + if (width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y; - src_u = src_u + (ptrdiff_t)(height - 1) * src_stride_u; - src_v = src_v + (ptrdiff_t)(height - 1) * src_stride_v; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (height - 1) * src_stride_u; + src_v = src_v + (height - 1) * src_stride_v; src_stride_y = -src_stride_y; src_stride_u = -src_stride_u; src_stride_v = -src_stride_v; @@ -532,17 +529,17 @@ static int Ix10ToI010(const uint16_t* src_y, int height, int subsample_x, int subsample_y) { - int r; - if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 || - height == 0 || height == INT_MIN) { - return -1; - } const int dst_y_width = width; const int dst_y_height = Abs(height); const int src_uv_width = SUBSAMPLE(width, subsample_x, subsample_x); const int src_uv_height = SUBSAMPLE(height, subsample_y, subsample_y); const int dst_uv_width = SUBSAMPLE(dst_y_width, 1, 1); const int dst_uv_height = SUBSAMPLE(dst_y_height, 1, 1); + int r; + if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 || + height == 0) { + return -1; + } if (dst_y) { CopyPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height); } @@ -612,11 +609,11 @@ static int IxxxToPxxx(const uint16_t* src_y, int subsample_x, int subsample_y, int depth) { - if (width <= 0 || height == 0 || height == INT_MIN) { - return -1; - } const int uv_width = SUBSAMPLE(width, subsample_x, subsample_x); const int uv_height = SUBSAMPLE(height, subsample_y, subsample_y); + if (width <= 0 || height == 0) { + return -1; + } ConvertToMSBPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height, depth); @@ -665,16 +662,16 @@ int I010ToNV12(const uint16_t* src_y, void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, int width) = MergeUVRow_C; if ((!src_y && dst_y) || !src_u || !src_v || !dst_uv || width <= 0 || - height == 0 || height == INT_MIN) { + height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; halfheight = (height + 1) >> 1; - src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y; - src_u = src_u + (ptrdiff_t)(halfheight - 1) * src_stride_u; - src_v = src_v + (ptrdiff_t)(halfheight - 1) * src_stride_v; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (halfheight - 1) * src_stride_u; + src_v = src_v + (halfheight - 1) * src_stride_v; src_stride_y = -src_stride_y; src_stride_u = -src_stride_u; src_stride_v = -src_stride_v; @@ -888,15 +885,15 @@ int I422ToI210(const uint8_t* src_y, int height) { int halfwidth = (width + 1) >> 1; if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 || - height == 0 || height == INT_MIN) { + height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y; - src_u = src_u + (ptrdiff_t)(height - 1) * src_stride_u; - src_v = src_v + (ptrdiff_t)(height - 1) * src_stride_v; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (height - 1) * src_stride_u; + src_v = src_v + (height - 1) * src_stride_v; src_stride_y = -src_stride_y; src_stride_u = -src_stride_u; src_stride_v = -src_stride_v; @@ -927,39 +924,29 @@ int I422ToNV21(const uint8_t* src_y, int dst_stride_vu, int width, int height) { - int r; - if (width <= 0 || height == 0 || height == INT_MIN) { - return -1; - } int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; // Negative height means invert the image. if (height < 0) { height = -height; halfheight = (height + 1) >> 1; - src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y; - src_u = src_u + (ptrdiff_t)(height - 1) * src_stride_u; - src_v = src_v + (ptrdiff_t)(height - 1) * src_stride_v; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (height - 1) * src_stride_u; + src_v = src_v + (height - 1) * src_stride_v; src_stride_y = -src_stride_y; src_stride_u = -src_stride_u; src_stride_v = -src_stride_v; } // Allocate u and v buffers - const uint64_t plane_size = (uint64_t)halfwidth * halfheight; - if (plane_size > SIZE_MAX / 2) - return 1; - align_buffer_64(plane_u, (size_t)plane_size * 2); + align_buffer_64(plane_u, halfwidth * halfheight * 2); + uint8_t* plane_v = plane_u + halfwidth * halfheight; if (!plane_u) return 1; - uint8_t* plane_v = plane_u + (size_t)plane_size; - r = I422ToI420(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, - dst_y, dst_stride_y, plane_u, halfwidth, plane_v, halfwidth, - width, height); - if (r != 0) { - return r; - } + I422ToI420(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, + dst_y, dst_stride_y, plane_u, halfwidth, plane_v, halfwidth, width, + height); MergeUVPlane(plane_v, halfwidth, plane_u, halfwidth, dst_vu, dst_stride_vu, halfwidth, halfheight); free_aligned_buffer_64(plane_u); @@ -1053,7 +1040,7 @@ int MT2TToP010(const uint8_t* src_y, int dst_stride_uv, int width, int height) { - if (width <= 0 || height == 0 || height == INT_MIN || !src_uv || !dst_uv) { + if (width <= 0 || !height || !src_uv || !dst_uv) { return -1; } @@ -1084,10 +1071,10 @@ int MT2TToP010(const uint8_t* src_y, height = -height; uv_height = (height + 1) / 2; if (dst_y) { - dst_y = dst_y + (ptrdiff_t)(height - 1) * dst_stride_y; + dst_y = dst_y + (height - 1) * dst_stride_y; dst_stride_y = -dst_stride_y; } - dst_uv = dst_uv + (ptrdiff_t)(uv_height - 1) * dst_stride_uv; + dst_uv = dst_uv + (uv_height - 1) * dst_stride_uv; dst_stride_uv = -dst_stride_uv; } @@ -1153,16 +1140,16 @@ int I422ToNV21(const uint8_t* src_y, int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; if ((!src_y && dst_y) || !src_u || !src_v || !dst_vu || width <= 0 || - height == 0 || height == INT_MIN) { + height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; halfheight = (height + 1) >> 1; - src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y; - src_u = src_u + (ptrdiff_t)(halfheight - 1) * src_stride_u; - src_v = src_v + (ptrdiff_t)(halfheight - 1) * src_stride_v; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (halfheight - 1) * src_stride_u; + src_v = src_v + (halfheight - 1) * src_stride_v; src_stride_y = -src_stride_y; src_stride_u = -src_stride_u; src_stride_v = -src_stride_v; @@ -1217,6 +1204,14 @@ int I422ToNV21(const uint8_t* src_y, MergeUVRow = MergeUVRow_RVV; } #endif +#if defined(HAS_INTERPOLATEROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + InterpolateRow = InterpolateRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + InterpolateRow = InterpolateRow_SSSE3; + } + } +#endif #if defined(HAS_INTERPOLATEROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { InterpolateRow = InterpolateRow_Any_AVX2; @@ -1317,15 +1312,15 @@ int I444ToNV12(const uint8_t* src_y, int width, int height) { if ((!src_y && dst_y) || !src_u || !src_v || !dst_uv || width <= 0 || - height == 0 || height == INT_MIN) { + height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y; - src_u = src_u + (ptrdiff_t)(height - 1) * src_stride_u; - src_v = src_v + (ptrdiff_t)(height - 1) * src_stride_v; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (height - 1) * src_stride_u; + src_v = src_v + (height - 1) * src_stride_v; src_stride_y = -src_stride_y; src_stride_u = -src_stride_u; src_stride_v = -src_stride_v; @@ -1370,15 +1365,14 @@ int I400ToI420(const uint8_t* src_y, int height) { int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; - if ((!src_y && dst_y) || !dst_u || !dst_v || width <= 0 || height == 0 || - height == INT_MIN) { + if ((!src_y && dst_y) || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; halfheight = (height + 1) >> 1; - src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y; + src_y = src_y + (height - 1) * src_stride_y; src_stride_y = -src_stride_y; } if (dst_y) { @@ -1401,15 +1395,14 @@ int I400ToNV21(const uint8_t* src_y, int height) { int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; - if ((!src_y && dst_y) || !dst_vu || width <= 0 || height == 0 || - height == INT_MIN) { + if ((!src_y && dst_y) || !dst_vu || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; halfheight = (height + 1) >> 1; - src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y; + src_y = src_y + (height - 1) * src_stride_y; src_stride_y = -src_stride_y; } if (dst_y) { @@ -1437,29 +1430,27 @@ int NV12ToI420(const uint8_t* src_y, int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; if ((!src_y && dst_y) || !src_uv || !dst_u || !dst_v || width <= 0 || - height == 0 || height == INT_MIN) { + height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; halfheight = (height + 1) >> 1; - src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y; - src_uv = src_uv + (ptrdiff_t)(halfheight - 1) * src_stride_uv; + src_y = src_y + (height - 1) * src_stride_y; + src_uv = src_uv + (halfheight - 1) * src_stride_uv; src_stride_y = -src_stride_y; src_stride_uv = -src_stride_uv; } // Coalesce rows. - if (src_stride_y == width && dst_stride_y == width && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_y == width && dst_stride_y == width) { width *= height; height = 1; src_stride_y = dst_stride_y = 0; } // Coalesce rows. if (src_stride_uv == halfwidth * 2 && dst_stride_u == halfwidth && - dst_stride_v == halfwidth && - (ptrdiff_t)halfwidth * halfheight <= INT_MAX) { + dst_stride_v == halfwidth) { halfwidth *= halfheight; halfheight = 1; src_stride_uv = dst_stride_u = dst_stride_v = 0; @@ -1507,8 +1498,7 @@ int NV12ToNV24(const uint8_t* src_y, int width, int height) { int r; - if ((!src_y && dst_y) || !src_uv || !dst_uv || width <= 0 || height == 0 || - height == INT_MIN) { + if ((!src_y && dst_y) || !src_uv || !dst_uv || width <= 0 || height == 0) { return -1; } @@ -1533,8 +1523,7 @@ int NV16ToNV24(const uint8_t* src_y, int width, int height) { int r; - if ((!src_y && dst_y) || !src_uv || !dst_uv || width <= 0 || height == 0 || - height == INT_MIN) { + if ((!src_y && dst_y) || !src_uv || !dst_uv || width <= 0 || height == 0) { return -1; } @@ -1565,7 +1554,7 @@ static int PxxxToIxxx(const uint16_t* src_y, const int uv_width = SUBSAMPLE(width, subsample_x, subsample_x); const int uv_height = SUBSAMPLE(height, subsample_y, subsample_y); if (!src_y || !dst_y || !src_uv || !dst_u || !dst_v || width <= 0 || - height == 0 || height == INT_MIN) { + height == 0) { return -1; } ConvertToLSBPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height, @@ -1623,8 +1612,7 @@ int P010ToP410(const uint16_t* src_y, int width, int height) { int r; - if ((!src_y && dst_y) || !src_uv || !dst_uv || width <= 0 || height == 0 || - height == INT_MIN) { + if ((!src_y && dst_y) || !src_uv || !dst_uv || width <= 0 || height == 0) { return -1; } @@ -1649,8 +1637,7 @@ int P210ToP410(const uint16_t* src_y, int width, int height) { int r; - if ((!src_y && dst_y) || !src_uv || !dst_uv || width <= 0 || height == 0 || - height == INT_MIN) { + if ((!src_y && dst_y) || !src_uv || !dst_uv || width <= 0 || height == 0) { return -1; } @@ -1680,13 +1667,10 @@ int YUY2ToI420(const uint8_t* src_yuy2, YUY2ToUVRow_C; void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) = YUY2ToYRow_C; - if (width <= 0 || height == 0 || height == INT_MIN) { - return -1; - } // Negative height means invert the image. if (height < 0) { height = -height; - src_yuy2 = src_yuy2 + (ptrdiff_t)(height - 1) * src_stride_yuy2; + src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2; src_stride_yuy2 = -src_stride_yuy2; } #if defined(HAS_YUY2TOYROW_SSE2) @@ -1774,13 +1758,10 @@ int UYVYToI420(const uint8_t* src_uyvy, UYVYToUVRow_C; void (*UYVYToYRow)(const uint8_t* src_uyvy, uint8_t* dst_y, int width) = UYVYToYRow_C; - if (width <= 0 || height == 0 || height == INT_MIN) { - return -1; - } // Negative height means invert the image. if (height < 0) { height = -height; - src_uyvy = src_uyvy + (ptrdiff_t)(height - 1) * src_stride_uyvy; + src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy; src_stride_uyvy = -src_stride_uyvy; } #if defined(HAS_UYVYTOYROW_SSE2) @@ -1875,13 +1856,10 @@ int AYUVToNV12(const uint8_t* src_ayuv, uint8_t* dst_uv, int width) = AYUVToUVRow_C; void (*AYUVToYRow)(const uint8_t* src_ayuv, uint8_t* dst_y, int width) = AYUVToYRow_C; - if (width <= 0 || height == 0 || height == INT_MIN) { - return -1; - } // Negative height means invert the image. if (height < 0) { height = -height; - src_ayuv = src_ayuv + (ptrdiff_t)(height - 1) * src_stride_ayuv; + src_ayuv = src_ayuv + (height - 1) * src_stride_ayuv; src_stride_ayuv = -src_stride_ayuv; } // place holders for future intel code @@ -1955,13 +1933,10 @@ int AYUVToNV21(const uint8_t* src_ayuv, uint8_t* dst_vu, int width) = AYUVToVURow_C; void (*AYUVToYRow)(const uint8_t* src_ayuv, uint8_t* dst_y, int width) = AYUVToYRow_C; - if (width <= 0 || height == 0 || height == INT_MIN) { - return -1; - } // Negative height means invert the image. if (height < 0) { height = -height; - src_ayuv = src_ayuv + (ptrdiff_t)(height - 1) * src_stride_ayuv; + src_ayuv = src_ayuv + (height - 1) * src_stride_ayuv; src_stride_ayuv = -src_stride_ayuv; } // place holders for future intel code @@ -2055,7 +2030,7 @@ int ARGBToI420Matrix(const uint8_t* src_argb, void (*ARGBToUVMatrixRow)(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width, const struct ArgbConstants* c) = - ARGBToUVMatrixRow_C; +ARGBToUVMatrixRow_C; #if defined(HAS_ARGBTOYMATRIXROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { @@ -2119,40 +2094,10 @@ int ARGBToI420Matrix(const uint8_t* src_argb, } #endif -#if defined(HAS_ARGBTOUVMATRIXROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_NEON_I8MM) - if (TestCpuFlag(kCpuHasNEON) && TestCpuFlag(kCpuHasNeonI8MM)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON_I8MM; - if (IS_ALIGNED(width, 16)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON_I8MM; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_SVE2) - if (TestCpuFlag(kCpuHasSVE2)) { - if (IS_ALIGNED(width, 2)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_SVE2; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_SME) - if (TestCpuFlag(kCpuHasSME)) { - if (IS_ALIGNED(width, 2)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_SME; - } - } -#endif #if defined(HAS_ARGBTOUVMATRIXROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { + if (IS_ALIGNED(width, 16)) { ARGBToUVMatrixRow = ARGBToUVMatrixRow_SSSE3; } } @@ -2172,15 +2117,61 @@ int ARGBToI420Matrix(const uint8_t* src_argb, ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX512BW; } } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_NEON_I8MM) + if (TestCpuFlag(kCpuHasNeonI8MM)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON_I8MM; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON_I8MM; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_SVE2) + if (TestCpuFlag(kCpuHasSVE2)) { + if (IS_ALIGNED(width, 2)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_SVE2; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_SME) + if (TestCpuFlag(kCpuHasSME)) { + if (IS_ALIGNED(width, 2)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_SME; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_LSX; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_LASX; + } + } #endif if (!src_argb || !dst_y || !dst_u || !dst_v || !argbconstants || width <= 0 || - height == 0 || height == INT_MIN) { + height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_argb = src_argb + (ptrdiff_t)(height - 1) * src_stride_argb; + src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } @@ -2206,7 +2197,7 @@ int ARGBToI420Matrix(const uint8_t* src_argb, // Convert ARGB to I420 with Alpha // The following version calls ARGBExtractAlpha on the full image. LIBYUV_API -int ARGBToI420Alpha(const uint8_t* src_argb, +int ARGBToI420AlphaMatrix(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_y, int dst_stride_y, @@ -2217,19 +2208,19 @@ int ARGBToI420Alpha(const uint8_t* src_argb, uint8_t* dst_a, int dst_stride_a, int width, - int height) { - int r = ARGBToI420(src_argb, src_stride_argb, dst_y, dst_stride_y, dst_u, - dst_stride_u, dst_v, dst_stride_v, width, height); + int height, + const struct ArgbConstants* argbconstants) { + int r = ARGBToI420Matrix(src_argb, src_stride_argb, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, argbconstants, width, height); if (r == 0) { r = ARGBExtractAlpha(src_argb, src_stride_argb, dst_a, dst_stride_a, width, height); } return r; } -#else // USE_EXTRACTALPHA -// Convert ARGB to I420 with Alpha + LIBYUV_API -int ARGBToI420Alpha(const uint8_t* src_argb, +int ARGBToI420AlphaMatrix(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_y, int dst_stride_y, @@ -2240,139 +2231,181 @@ int ARGBToI420Alpha(const uint8_t* src_argb, uint8_t* dst_a, int dst_stride_a, int width, - int height) { + int height, + const struct ArgbConstants* argbconstants) { + return ARGBToI420AlphaMatrix(src_argb, src_stride_argb, dst_y, dst_stride_y, + dst_u, dst_stride_u, dst_v, dst_stride_v, + dst_a, dst_stride_a, width, height, + &kArgbI601Constants); +} +#else // USE_EXTRACTALPHA +// Convert ARGB to I420 with Alpha +LIBYUV_API +int ARGBToI420AlphaMatrix(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + uint8_t* dst_a, + int dst_stride_a, + int width, + int height, + const struct ArgbConstants* argbconstants) { int y; - void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, - uint8_t* dst_u, uint8_t* dst_v, int width) = - ARGBToUVRow_C; - void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = - ARGBToYRow_C; + void (*ARGBToUVMatrixRow)(const uint8_t* src_argb, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width, + const struct ArgbConstants* c) = + ARGBToUVMatrixRow_C; + void (*ARGBToYMatrixRow)(const uint8_t* src_argb, uint8_t* dst_y, int width, + const struct ArgbConstants* c) = ARGBToYMatrixRow_C; void (*ARGBExtractAlphaRow)(const uint8_t* src_argb, uint8_t* dst_a, int width) = ARGBExtractAlphaRow_C; if (!src_argb || !dst_y || !dst_u || !dst_v || !dst_a || width <= 0 || - height == 0 || height == INT_MIN) { + height == 0) { return -1; } + +#if defined(HAS_ARGBTOYMATRIXROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_AVX512BW; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_NEON; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_NEON_DOTPROD) + if (TestCpuFlag(kCpuHasNeonDotProd)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON_DotProd; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_NEON_DotProd; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_LSX; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_LASX; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_RVV; + } +#endif + +#if defined(HAS_ARGBTOUVMATRIXROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX512BW; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_NEON_I8MM) + if (TestCpuFlag(kCpuHasNeonI8MM)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON_I8MM; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON_I8MM; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_SVE2) + if (TestCpuFlag(kCpuHasSVE2)) { + if (IS_ALIGNED(width, 2)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_SVE2; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_SME) + if (TestCpuFlag(kCpuHasSME)) { + if (IS_ALIGNED(width, 2)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_SME; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_LSX; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_LASX; + } + } +#endif // Negative height means invert the image. if (height < 0) { height = -height; - src_argb = src_argb + (ptrdiff_t)(height - 1) * src_stride_argb; + src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } -#if defined(HAS_ARGBTOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToYRow = ARGBToYRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToYRow = ARGBToYRow_NEON; - } - } -#endif -#if defined(HAS_ARGBTOYROW_NEON_DOTPROD) - if (TestCpuFlag(kCpuHasNeonDotProd)) { - ARGBToYRow = ARGBToYRow_Any_NEON_DotProd; - if (IS_ALIGNED(width, 16)) { - ARGBToYRow = ARGBToYRow_NEON_DotProd; - } - } -#endif -#if defined(HAS_ARGBTOUVROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToUVRow = ARGBToUVRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_NEON; - } - } -#endif -#if defined(HAS_ARGBTOUVROW_NEON_I8MM) - if (TestCpuFlag(kCpuHasNeonI8MM)) { - ARGBToUVRow = ARGBToUVRow_Any_NEON_I8MM; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_NEON_I8MM; - } - } -#endif -#if defined(HAS_ARGBTOUVROW_SVE2) - if (TestCpuFlag(kCpuHasSVE2)) { - ARGBToUVRow = ARGBToUVRow_Any_SVE2; - if (IS_ALIGNED(width, 2)) { - ARGBToUVRow = ARGBToUVRow_SVE2; - } - } -#endif -#if defined(HAS_ARGBTOUVROW_SME) - if (TestCpuFlag(kCpuHasSME)) { - ARGBToUVRow = ARGBToUVRow_Any_SME; - if (IS_ALIGNED(width, 2)) { - ARGBToUVRow = ARGBToUVRow_SME; - } - } -#endif -#if defined(HAS_ARGBTOYROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToYRow = ARGBToYRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToYRow = ARGBToYRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOUVROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToUVRow = ARGBToUVRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOYROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToYRow = ARGBToYRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToYRow = ARGBToYRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOYROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - ARGBToYRow = ARGBToYRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - ARGBToYRow = ARGBToYRow_AVX512BW; - } - } -#endif -#if defined(HAS_ARGBTOUVROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToUVRow = ARGBToUVRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToUVRow = ARGBToUVRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOUVROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - ARGBToUVRow = ARGBToUVRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - ARGBToUVRow = ARGBToUVRow_AVX512BW; - } - } -#endif -#if defined(HAS_ARGBTOYROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - ARGBToYRow = ARGBToYRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - ARGBToYRow = ARGBToYRow_LSX; - } - } -#endif -#if defined(HAS_ARGBTOYROW_LASX) && defined(HAS_ARGBTOUVROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - ARGBToYRow = ARGBToYRow_Any_LASX; - ARGBToUVRow = ARGBToUVRow_Any_LASX; - if (IS_ALIGNED(width, 32)) { - ARGBToYRow = ARGBToYRow_LASX; - ARGBToUVRow = ARGBToUVRow_LASX; - } - } -#endif #if defined(HAS_ARGBEXTRACTALPHAROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ARGBExtractAlphaRow = IS_ALIGNED(width, 8) ? ARGBExtractAlphaRow_SSE2 @@ -2404,9 +2437,10 @@ int ARGBToI420Alpha(const uint8_t* src_argb, #endif for (y = 0; y < height - 1; y += 2) { - ARGBToUVRow(src_argb, src_stride_argb, dst_u, dst_v, width); - ARGBToYRow(src_argb, dst_y, width); - ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width); + ARGBToUVMatrixRow(src_argb, src_stride_argb, dst_u, dst_v, width, argbconstants); + ARGBToYMatrixRow(src_argb, dst_y, width, argbconstants); + ARGBToYMatrixRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width, + argbconstants); ARGBExtractAlphaRow(src_argb, dst_a, width); ARGBExtractAlphaRow(src_argb + src_stride_argb, dst_a + dst_stride_a, width); @@ -2417,12 +2451,31 @@ int ARGBToI420Alpha(const uint8_t* src_argb, dst_a += dst_stride_a * 2; } if (height & 1) { - ARGBToUVRow(src_argb, 0, dst_u, dst_v, width); - ARGBToYRow(src_argb, dst_y, width); + ARGBToUVMatrixRow(src_argb, 0, dst_u, dst_v, width, argbconstants); + ARGBToYMatrixRow(src_argb, dst_y, width, argbconstants); ARGBExtractAlphaRow(src_argb, dst_a, width); } return 0; } + +LIBYUV_API +int ARGBToI420Alpha(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + uint8_t* dst_a, + int dst_stride_a, + int width, + int height) { + return ARGBToI420AlphaMatrix(src_argb, src_stride_argb, dst_y, dst_stride_y, + dst_u, dst_stride_u, dst_v, dst_stride_v, + dst_a, dst_stride_a, width, height, + &kArgbI601Constants); +} #endif // USE_EXTRACTALPHA // Convert BGRA to I420. @@ -2437,60 +2490,147 @@ int BGRAToI420(const uint8_t* src_bgra, int dst_stride_v, int width, int height) { - return ARGBToI420Matrix(src_bgra, src_stride_bgra, dst_y, dst_stride_y, dst_u, - dst_stride_u, dst_v, dst_stride_v, - &kBgraI601Constants, width, height); -} + int y; + void (*BGRAToUVRow)(const uint8_t* src_bgra0, int src_stride_bgra, + uint8_t* dst_u, uint8_t* dst_v, int width) = + BGRAToUVRow_C; + void (*BGRAToYRow)(const uint8_t* src_bgra, uint8_t* dst_y, int width) = + BGRAToYRow_C; + if (!src_bgra || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_bgra = src_bgra + (height - 1) * src_stride_bgra; + src_stride_bgra = -src_stride_bgra; + } +#if defined(HAS_BGRATOYROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + BGRAToYRow = BGRAToYRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + BGRAToYRow = BGRAToYRow_NEON; + } + } +#endif +#if defined(HAS_BGRATOYROW_NEON_DOTPROD) + if (TestCpuFlag(kCpuHasNeonDotProd)) { + BGRAToYRow = BGRAToYRow_Any_NEON_DotProd; + if (IS_ALIGNED(width, 16)) { + BGRAToYRow = BGRAToYRow_NEON_DotProd; + } + } +#endif +#if defined(HAS_BGRATOUVROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + BGRAToUVRow = BGRAToUVRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + BGRAToUVRow = BGRAToUVRow_NEON; + } + } +#endif +#if defined(HAS_BGRATOUVROW_NEON_I8MM) + if (TestCpuFlag(kCpuHasNeonI8MM)) { + BGRAToUVRow = BGRAToUVRow_Any_NEON_I8MM; + if (IS_ALIGNED(width, 16)) { + BGRAToUVRow = BGRAToUVRow_NEON_I8MM; + } + } +#endif +#if defined(HAS_BGRATOUVROW_SVE2) + if (TestCpuFlag(kCpuHasSVE2)) { + BGRAToUVRow = BGRAToUVRow_Any_SVE2; + if (IS_ALIGNED(width, 2)) { + BGRAToUVRow = BGRAToUVRow_SVE2; + } + } +#endif +#if defined(HAS_BGRATOUVROW_SME) + if (TestCpuFlag(kCpuHasSME)) { + BGRAToUVRow = BGRAToUVRow_Any_SME; + if (IS_ALIGNED(width, 2)) { + BGRAToUVRow = BGRAToUVRow_SME; + } + } +#endif +#if defined(HAS_BGRATOYROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + BGRAToYRow = BGRAToYRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + BGRAToYRow = BGRAToYRow_SSSE3; + } + } +#endif +#if defined(HAS_BGRATOUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + BGRAToUVRow = BGRAToUVRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + BGRAToUVRow = BGRAToUVRow_SSSE3; + } + } +#endif +#if defined(HAS_BGRATOYROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + BGRAToYRow = BGRAToYRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + BGRAToYRow = BGRAToYRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOYROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + BGRAToYRow = BGRAToYRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + BGRAToYRow = BGRAToYRow_AVX512BW; + } + } +#endif +#if defined(HAS_BGRATOUVROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + BGRAToUVRow = BGRAToUVRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + BGRAToUVRow = BGRAToUVRow_AVX2; + } + } +#endif +#if defined(HAS_BGRATOYROW_LSX) && defined(HAS_BGRATOUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + BGRAToYRow = BGRAToYRow_Any_LSX; + BGRAToUVRow = BGRAToUVRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + BGRAToYRow = BGRAToYRow_LSX; + BGRAToUVRow = BGRAToUVRow_LSX; + } + } +#endif +#if defined(HAS_BGRATOYROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + BGRAToYRow = BGRAToYRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + BGRAToYRow = BGRAToYRow_LASX; + } + } +#endif +#if defined(HAS_BGRATOYROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + BGRAToYRow = BGRAToYRow_RVV; + } +#endif -// Convert BGRA to I422. -LIBYUV_API -int BGRAToI422(const uint8_t* src_bgra, - int src_stride_bgra, - uint8_t* dst_y, - int dst_stride_y, - uint8_t* dst_u, - int dst_stride_u, - uint8_t* dst_v, - int dst_stride_v, - int width, - int height) { - return ARGBToI422Matrix(src_bgra, src_stride_bgra, dst_y, dst_stride_y, dst_u, - dst_stride_u, dst_v, dst_stride_v, - &kBgraI601Constants, width, height); -} - -// Convert ABGR to I422. -LIBYUV_API -int ABGRToI422(const uint8_t* src_abgr, - int src_stride_abgr, - uint8_t* dst_y, - int dst_stride_y, - uint8_t* dst_u, - int dst_stride_u, - uint8_t* dst_v, - int dst_stride_v, - int width, - int height) { - return ARGBToI422Matrix(src_abgr, src_stride_abgr, dst_y, dst_stride_y, dst_u, - dst_stride_u, dst_v, dst_stride_v, - &kAbgrI601Constants, width, height); -} - -// Convert RGBA to I422. -LIBYUV_API -int RGBAToI422(const uint8_t* src_rgba, - int src_stride_rgba, - uint8_t* dst_y, - int dst_stride_y, - uint8_t* dst_u, - int dst_stride_u, - uint8_t* dst_v, - int dst_stride_v, - int width, - int height) { - return ARGBToI422Matrix(src_rgba, src_stride_rgba, dst_y, dst_stride_y, dst_u, - dst_stride_u, dst_v, dst_stride_v, - &kRgbaI601Constants, width, height); + for (y = 0; y < height - 1; y += 2) { + BGRAToUVRow(src_bgra, src_stride_bgra, dst_u, dst_v, width); + BGRAToYRow(src_bgra, dst_y, width); + BGRAToYRow(src_bgra + src_stride_bgra, dst_y + dst_stride_y, width); + src_bgra += src_stride_bgra * 2; + dst_y += dst_stride_y * 2; + dst_u += dst_stride_u; + dst_v += dst_stride_v; + } + if (height & 1) { + BGRAToUVRow(src_bgra, 0, dst_u, dst_v, width); + BGRAToYRow(src_bgra, dst_y, width); + } + return 0; } // Convert ABGR to I420. @@ -2505,9 +2645,147 @@ int ABGRToI420(const uint8_t* src_abgr, int dst_stride_v, int width, int height) { - return ARGBToI420Matrix(src_abgr, src_stride_abgr, dst_y, dst_stride_y, dst_u, - dst_stride_u, dst_v, dst_stride_v, - &kAbgrI601Constants, width, height); + int y; + void (*ABGRToUVRow)(const uint8_t* src_abgr0, int src_stride_abgr, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ABGRToUVRow_C; + void (*ABGRToYRow)(const uint8_t* src_abgr, uint8_t* dst_y, int width) = + ABGRToYRow_C; + if (!src_abgr || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_abgr = src_abgr + (height - 1) * src_stride_abgr; + src_stride_abgr = -src_stride_abgr; + } +#if defined(HAS_ABGRTOYROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ABGRToYRow = ABGRToYRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ABGRToYRow = ABGRToYRow_SSSE3; + } + } +#endif +#if defined(HAS_ABGRTOUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ABGRToUVRow = ABGRToUVRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ABGRToUVRow = ABGRToUVRow_SSSE3; + } + } +#endif +#if defined(HAS_ABGRTOYROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ABGRToYRow = ABGRToYRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ABGRToYRow = ABGRToYRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOYROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + ABGRToYRow = ABGRToYRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + ABGRToYRow = ABGRToYRow_AVX512BW; + } + } +#endif +#if defined(HAS_ABGRTOUVROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ABGRToUVRow = ABGRToUVRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ABGRToUVRow = ABGRToUVRow_AVX2; + } + } +#endif +#if defined(HAS_ABGRTOYROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ABGRToYRow = ABGRToYRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ABGRToYRow = ABGRToYRow_NEON; + } + } +#endif +#if defined(HAS_ABGRTOYROW_NEON_DOTPROD) + if (TestCpuFlag(kCpuHasNeonDotProd)) { + ABGRToYRow = ABGRToYRow_Any_NEON_DotProd; + if (IS_ALIGNED(width, 16)) { + ABGRToYRow = ABGRToYRow_NEON_DotProd; + } + } +#endif +#if defined(HAS_ABGRTOUVROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ABGRToUVRow = ABGRToUVRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ABGRToUVRow = ABGRToUVRow_NEON; + } + } +#endif +#if defined(HAS_ABGRTOUVROW_NEON_I8MM) + if (TestCpuFlag(kCpuHasNeonI8MM)) { + ABGRToUVRow = ABGRToUVRow_Any_NEON_I8MM; + if (IS_ALIGNED(width, 16)) { + ABGRToUVRow = ABGRToUVRow_NEON_I8MM; + } + } +#endif +#if defined(HAS_ABGRTOUVROW_SVE2) + if (TestCpuFlag(kCpuHasSVE2)) { + ABGRToUVRow = ABGRToUVRow_Any_SVE2; + if (IS_ALIGNED(width, 2)) { + ABGRToUVRow = ABGRToUVRow_SVE2; + } + } +#endif +#if defined(HAS_ABGRTOUVROW_SME) + if (TestCpuFlag(kCpuHasSME)) { + ABGRToUVRow = ABGRToUVRow_Any_SME; + if (IS_ALIGNED(width, 2)) { + ABGRToUVRow = ABGRToUVRow_SME; + } + } +#endif +#if defined(HAS_ABGRTOYROW_LSX) && defined(HAS_ABGRTOUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ABGRToYRow = ABGRToYRow_Any_LSX; + ABGRToUVRow = ABGRToUVRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ABGRToYRow = ABGRToYRow_LSX; + ABGRToUVRow = ABGRToUVRow_LSX; + } + } +#endif +#if defined(HAS_ABGRTOYROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ABGRToYRow = ABGRToYRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ABGRToYRow = ABGRToYRow_LASX; + } + } +#endif +#if defined(HAS_ABGRTOYROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ABGRToYRow = ABGRToYRow_RVV; + } +#endif + + for (y = 0; y < height - 1; y += 2) { + ABGRToUVRow(src_abgr, src_stride_abgr, dst_u, dst_v, width); + ABGRToYRow(src_abgr, dst_y, width); + ABGRToYRow(src_abgr + src_stride_abgr, dst_y + dst_stride_y, width); + src_abgr += src_stride_abgr * 2; + dst_y += dst_stride_y * 2; + dst_u += dst_stride_u; + dst_v += dst_stride_v; + } + if (height & 1) { + ABGRToUVRow(src_abgr, 0, dst_u, dst_v, width); + ABGRToYRow(src_abgr, dst_y, width); + } + return 0; } // Convert RGBA to I420. @@ -2522,16 +2800,334 @@ int RGBAToI420(const uint8_t* src_rgba, int dst_stride_v, int width, int height) { - return ARGBToI420Matrix(src_rgba, src_stride_rgba, dst_y, dst_stride_y, dst_u, - dst_stride_u, dst_v, dst_stride_v, - &kRgbaI601Constants, width, height); + int y; + void (*RGBAToUVRow)(const uint8_t* src_rgba0, int src_stride_rgba, + uint8_t* dst_u, uint8_t* dst_v, int width) = + RGBAToUVRow_C; + void (*RGBAToYRow)(const uint8_t* src_rgba, uint8_t* dst_y, int width) = + RGBAToYRow_C; + if (!src_rgba || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_rgba = src_rgba + (height - 1) * src_stride_rgba; + src_stride_rgba = -src_stride_rgba; + } +#if defined(HAS_RGBATOYROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + RGBAToYRow = RGBAToYRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + RGBAToYRow = RGBAToYRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOYROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + RGBAToYRow = RGBAToYRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + RGBAToYRow = RGBAToYRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOYROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + RGBAToYRow = RGBAToYRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + RGBAToYRow = RGBAToYRow_AVX512BW; + } + } +#endif +#if defined(HAS_RGBATOUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + RGBAToUVRow = RGBAToUVRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + RGBAToUVRow = RGBAToUVRow_SSSE3; + } + } +#endif +#if defined(HAS_RGBATOYROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + RGBAToYRow = RGBAToYRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + RGBAToYRow = RGBAToYRow_NEON; + } + } +#endif +#if defined(HAS_RGBATOYROW_NEON_DOTPROD) + if (TestCpuFlag(kCpuHasNeonDotProd)) { + RGBAToYRow = RGBAToYRow_Any_NEON_DotProd; + if (IS_ALIGNED(width, 16)) { + RGBAToYRow = RGBAToYRow_NEON_DotProd; + } + } +#endif +#if defined(HAS_RGBATOUVROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + RGBAToUVRow = RGBAToUVRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + RGBAToUVRow = RGBAToUVRow_NEON; + } + } +#endif +#if defined(HAS_RGBATOUVROW_NEON_I8MM) + if (TestCpuFlag(kCpuHasNeonI8MM)) { + RGBAToUVRow = RGBAToUVRow_Any_NEON_I8MM; + if (IS_ALIGNED(width, 16)) { + RGBAToUVRow = RGBAToUVRow_NEON_I8MM; + } + } +#endif +#if defined(HAS_RGBATOUVROW_SVE2) + if (TestCpuFlag(kCpuHasSVE2)) { + RGBAToUVRow = RGBAToUVRow_Any_SVE2; + if (IS_ALIGNED(width, 2)) { + RGBAToUVRow = RGBAToUVRow_SVE2; + } + } +#endif +#if defined(HAS_RGBATOUVROW_SME) + if (TestCpuFlag(kCpuHasSME)) { + RGBAToUVRow = RGBAToUVRow_Any_SME; + if (IS_ALIGNED(width, 2)) { + RGBAToUVRow = RGBAToUVRow_SME; + } + } +#endif +#if defined(HAS_RGBATOYROW_LSX) && defined(HAS_RGBATOUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + RGBAToYRow = RGBAToYRow_Any_LSX; + RGBAToUVRow = RGBAToUVRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + RGBAToYRow = RGBAToYRow_LSX; + RGBAToUVRow = RGBAToUVRow_LSX; + } + } +#endif +#if defined(HAS_RGBATOYROW_LASX) + if (TestCpuFlag(kCpuHasNEON)) { + RGBAToYRow = RGBAToYRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + RGBAToYRow = RGBAToYRow_LASX; + } + } +#endif +#if defined(HAS_RGBATOYROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + RGBAToYRow = RGBAToYRow_RVV; + } +#endif + + for (y = 0; y < height - 1; y += 2) { + RGBAToUVRow(src_rgba, src_stride_rgba, dst_u, dst_v, width); + RGBAToYRow(src_rgba, dst_y, width); + RGBAToYRow(src_rgba + src_stride_rgba, dst_y + dst_stride_y, width); + src_rgba += src_stride_rgba * 2; + dst_y += dst_stride_y * 2; + dst_u += dst_stride_u; + dst_v += dst_stride_v; + } + if (height & 1) { + RGBAToUVRow(src_rgba, 0, dst_u, dst_v, width); + RGBAToYRow(src_rgba, dst_y, width); + } + return 0; } -// Enabled if 1 pass is available -#if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_LSX) || \ - defined(HAS_RGB24TOYROW_RVV)) -#define HAS_RGB24TOYROW +// Any RGB to I420 with Matrix +static int RGBToI420Matrix(const uint8_t* src_rgb, + int src_stride_rgb, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height, + const struct ArgbConstants* argbconstants, + void (*RGBToARGBRow)(const uint8_t* src_rgb, + uint8_t* dst_argb, + int width)) { + int y; + void (*ARGBToUVMatrixRow)(const uint8_t* src_argb0, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width, + const struct ArgbConstants* c) = + ARGBToUVMatrixRow_C; + void (*ARGBToYMatrixRow)(const uint8_t* src_argb, uint8_t* dst_y, int width, + const struct ArgbConstants* c) = ARGBToYMatrixRow_C; + +#if defined(HAS_ARGBTOYMATRIXROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_SSSE3; + } + } #endif +#if defined(HAS_ARGBTOYMATRIXROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_AVX512BW; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_NEON; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_NEON_DOTPROD) + if (TestCpuFlag(kCpuHasNeonDotProd)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON_DotProd; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_NEON_DotProd; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_LSX; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_LASX; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_RVV; + } +#endif + +#if defined(HAS_ARGBTOUVMATRIXROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX512BW; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_NEON_I8MM) + if (TestCpuFlag(kCpuHasNeonI8MM)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON_I8MM; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON_I8MM; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_SVE2) + if (TestCpuFlag(kCpuHasSVE2)) { + if (IS_ALIGNED(width, 2)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_SVE2; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_SME) + if (TestCpuFlag(kCpuHasSME)) { + if (IS_ALIGNED(width, 2)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_SME; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_LSX; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_LASX; + } + } +#endif + + if (!src_rgb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_rgb = src_rgb + (height - 1) * src_stride_rgb; + src_stride_rgb = -src_stride_rgb; + } + + { + // Allocate 2 rows of ARGB. + const int row_size = (width * 4 + 31) & ~31; + align_buffer_64(row, row_size * 2); + if (!row) + return 1; + + for (y = 0; y < height - 1; y += 2) { + RGBToARGBRow(src_rgb, row, width); + RGBToARGBRow(src_rgb + src_stride_rgb, row + row_size, width); + ARGBToUVMatrixRow(row, row_size, dst_u, dst_v, width, argbconstants); + ARGBToYMatrixRow(row, dst_y, width, argbconstants); + ARGBToYMatrixRow(row + row_size, dst_y + dst_stride_y, width, argbconstants); + src_rgb += src_stride_rgb * 2; + dst_y += dst_stride_y * 2; + dst_u += dst_stride_u; + dst_v += dst_stride_v; + } + if (height & 1) { + RGBToARGBRow(src_rgb, row, width); + ARGBToUVMatrixRow(row, 0, dst_u, dst_v, width, argbconstants); + ARGBToYMatrixRow(row, dst_y, width, argbconstants); + } + free_aligned_buffer_64(row); + } + return 0; +} // Convert RGB24 to I420. LIBYUV_API @@ -2545,171 +3141,8 @@ int RGB24ToI420(const uint8_t* src_rgb24, int dst_stride_v, int width, int height) { - int y; - void (*RGBToUVMatrixRow)(const uint8_t* src_rgb, int src_stride_rgb, - uint8_t* dst_u, uint8_t* dst_v, int width, - const struct ArgbConstants* c) = RGBToUVMatrixRow_C; - void (*RGBToYMatrixRow)(const uint8_t* src_rgb, uint8_t* dst_y, int width, - const struct ArgbConstants* c) = RGBToYMatrixRow_C; - -#if defined(HAS_RGBTOYMATRIXROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - RGBToYMatrixRow = RGBToYMatrixRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - RGBToYMatrixRow = RGBToYMatrixRow_AVX2; - } - } -#endif -#if defined(HAS_RGBTOUVMATRIXROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - RGBToUVMatrixRow = RGBToUVMatrixRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - RGBToUVMatrixRow = RGBToUVMatrixRow_AVX2; - } - } -#endif -#if defined(HAS_RGBTOUVMATRIXROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - RGBToUVMatrixRow = RGBToUVMatrixRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - RGBToUVMatrixRow = RGBToUVMatrixRow_AVX512BW; - } - } -#endif -#if defined(HAS_RGBTOUVMATRIXROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - RGBToUVMatrixRow = RGBToUVMatrixRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - RGBToUVMatrixRow = RGBToUVMatrixRow_NEON; - } - } -#endif -#if defined(HAS_RGBTOYMATRIXROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - RGBToYMatrixRow = RGBToYMatrixRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - RGBToYMatrixRow = RGBToYMatrixRow_NEON; - } - } -#endif -#if defined(HAS_RGBTOYMATRIXROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - RGBToYMatrixRow = RGBToYMatrixRow_LSX; // This uses the NEON/LSX names - } -#endif - - if (!src_rgb24 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0 || - height == INT_MIN) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_rgb24 = src_rgb24 + (ptrdiff_t)(height - 1) * src_stride_rgb24; - src_stride_rgb24 = -src_stride_rgb24; - } - - for (y = 0; y < height - 1; y += 2) { - RGBToUVMatrixRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width, - &kArgbI601Constants); - RGBToYMatrixRow(src_rgb24, dst_y, width, &kArgbI601Constants); - RGBToYMatrixRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width, - &kArgbI601Constants); - src_rgb24 += src_stride_rgb24 * 2; - dst_y += dst_stride_y * 2; - dst_u += dst_stride_u; - dst_v += dst_stride_v; - } - if (height & 1) { - RGBToYMatrixRow(src_rgb24, dst_y, width, &kArgbI601Constants); - RGBToUVMatrixRow(src_rgb24, 0, dst_u, dst_v, width, &kArgbI601Constants); - } - return 0; -} -#undef HAS_RGB24TOYROW - -// Enabled if 1 pass is available -#if defined(HAS_RGB24TOYJROW_NEON) || defined(HAS_RGB24TOYJROW_RVV) -#define HAS_RGB24TOYJROW -#endif - -// Convert RGB24 to J420. -LIBYUV_API -int RGB24ToJ420(const uint8_t* src_rgb24, - int src_stride_rgb24, - uint8_t* dst_y, - int dst_stride_y, - uint8_t* dst_u, - int dst_stride_u, - uint8_t* dst_v, - int dst_stride_v, - int width, - int height) { - int y; -#if defined(HAS_RGB24TOYJROW) - void (*RGB24ToUVJRow)(const uint8_t* src_rgb24, int src_stride_rgb24, - uint8_t* dst_u, uint8_t* dst_v, int width) = - RGB24ToUVJRow_C; - void (*RGB24ToYJRow)(const uint8_t* src_rgb24, uint8_t* dst_y, int width) = - RGB24ToYJRow_C; -#else void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = RGB24ToARGBRow_C; - void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb, - uint8_t* dst_u, uint8_t* dst_v, int width) = - ARGBToUVJRow_C; - void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = - ARGBToYJRow_C; -#endif - if (!src_rgb24 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0 || - height == INT_MIN) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_rgb24 = src_rgb24 + (ptrdiff_t)(height - 1) * src_stride_rgb24; - src_stride_rgb24 = -src_stride_rgb24; - } - -#if defined(HAS_RGB24TOYJROW) - -// Neon version does direct RGB24 to YUV. -#if defined(HAS_RGB24TOYJROW_NEON) && defined(HAS_RGB24TOUVJROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - RGB24ToUVJRow = RGB24ToUVJRow_Any_NEON; - RGB24ToYJRow = RGB24ToYJRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - RGB24ToYJRow = RGB24ToYJRow_NEON; - RGB24ToUVJRow = RGB24ToUVJRow_NEON; - } - } -#endif -#if defined(HAS_RGB24TOYJROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - RGB24ToYJRow = RGB24ToYJRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - RGB24ToYJRow = RGB24ToYJRow_LSX; - } - } -#endif -#if defined(HAS_RGB24TOYJROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - RGB24ToYJRow = RGB24ToYJRow_Any_LASX; - if (IS_ALIGNED(width, 32)) { - RGB24ToYJRow = RGB24ToYJRow_LASX; - } - } -#endif -#if defined(HAS_RGB24TOYJROW_RVV) - if (TestCpuFlag(kCpuHasRVV)) { - RGB24ToYJRow = RGB24ToYJRow_RVV; - } -#endif - -// Other platforms do intermediate conversion from RGB24 to ARGB. -#else // HAS_RGB24TOYJROW - #if defined(HAS_RGB24TOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3; @@ -2768,102 +3201,94 @@ int RGB24ToJ420(const uint8_t* src_rgb24, RGB24ToARGBRow = RGB24ToARGBRow_RVV; } #endif -#if defined(HAS_ARGBTOYJROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToYJRow = ARGBToYJRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToYJRow = ARGBToYJRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOYJROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToYJRow = ARGBToYJRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToYJRow = ARGBToYJRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOYROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - ARGBToYJRow = ARGBToYJRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - ARGBToYJRow = ARGBToYJRow_AVX512BW; - } - } -#endif -#if defined(HAS_ARGBTOUVJROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToUVJRow = ARGBToUVJRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOUVJROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToUVJRow = ARGBToUVJRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToUVJRow = ARGBToUVJRow_AVX2; - } - } -#endif -#endif // HAS_RGB24TOYJROW - { -#if !defined(HAS_RGB24TOYJROW) - // Allocate 2 rows of ARGB. - const int row_size = (width * 4 + 31) & ~31; - align_buffer_64(row, row_size * 2); - if (!row) - return 1; -#endif - - for (y = 0; y < height - 1; y += 2) { -#if defined(HAS_RGB24TOYJROW) - RGB24ToUVJRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width); - RGB24ToYJRow(src_rgb24, dst_y, width); - RGB24ToYJRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width); -#else - RGB24ToARGBRow(src_rgb24, row, width); - RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + row_size, width); - ARGBToUVJRow(row, row_size, dst_u, dst_v, width); - ARGBToYJRow(row, dst_y, width); - ARGBToYJRow(row + row_size, dst_y + dst_stride_y, width); -#endif - src_rgb24 += src_stride_rgb24 * 2; - dst_y += dst_stride_y * 2; - dst_u += dst_stride_u; - dst_v += dst_stride_v; - } - if (height & 1) { -#if defined(HAS_RGB24TOYJROW) - RGB24ToUVJRow(src_rgb24, 0, dst_u, dst_v, width); - RGB24ToYJRow(src_rgb24, dst_y, width); -#else - RGB24ToARGBRow(src_rgb24, row, width); - ARGBToUVJRow(row, 0, dst_u, dst_v, width); - ARGBToYJRow(row, dst_y, width); -#endif - } -#if !defined(HAS_RGB24TOYJROW) - free_aligned_buffer_64(row); -#endif - } - return 0; + return RGBToI420Matrix(src_rgb24, src_stride_rgb24, dst_y, dst_stride_y, + dst_u, dst_stride_u, dst_v, dst_stride_v, width, + height, &kArgbI601Constants, RGB24ToARGBRow); } -#undef HAS_RGB24TOYJROW -// Enabled if 1 pass is available -#if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_LSX) || \ - defined(HAS_RAWTOYROW_RVV)) -#define HAS_RAWTOYROW +// Convert RGB24 to J420. +LIBYUV_API +int RGB24ToJ420(const uint8_t* src_rgb24, + int src_stride_rgb24, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height) { + void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = + RGB24ToARGBRow_C; +#if defined(HAS_RGB24TOARGBROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + RGB24ToARGBRow = RGB24ToARGBRow_SSSE3; + } + } #endif +#if defined(HAS_RGB24TOARGBROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + RGB24ToARGBRow = RGB24ToARGBRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + RGB24ToARGBRow = RGB24ToARGBRow_AVX2; + } + } +#endif +#if defined(HAS_RGB24TOARGBROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + RGB24ToARGBRow = RGB24ToARGBRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + RGB24ToARGBRow = RGB24ToARGBRow_AVX512BW; + } + } +#endif +#if defined(HAS_RGB24TOARGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + RGB24ToARGBRow = RGB24ToARGBRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + RGB24ToARGBRow = RGB24ToARGBRow_NEON; + } + } +#endif +#if defined(HAS_RGB24TOARGBROW_SVE2) + if (TestCpuFlag(kCpuHasSVE2)) { + RGB24ToARGBRow = RGB24ToARGBRow_SVE2; + } +#endif +#if defined(HAS_RGB24TOARGBROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + RGB24ToARGBRow = RGB24ToARGBRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + RGB24ToARGBRow = RGB24ToARGBRow_LSX; + } + } +#endif +#if defined(HAS_RGB24TOARGBROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + RGB24ToARGBRow = RGB24ToARGBRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + RGB24ToARGBRow = RGB24ToARGBRow_LASX; + } + } +#endif +#if defined(HAS_RGB24TOARGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + RGB24ToARGBRow = RGB24ToARGBRow_RVV; + } +#endif + + return RGBToI420Matrix(src_rgb24, src_stride_rgb24, dst_y, dst_stride_y, + dst_u, dst_stride_u, dst_v, dst_stride_v, width, + height, &kArgbJPEGConstants, RGB24ToARGBRow); +} // Convert RAW to I420. LIBYUV_API -int RAWToI420(const uint8_t* src_rgb24, - int src_stride_rgb24, +int RAWToI420(const uint8_t* src_raw, + int src_stride_raw, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, @@ -2872,93 +3297,71 @@ int RAWToI420(const uint8_t* src_rgb24, int dst_stride_v, int width, int height) { - int y; - void (*RGBToUVMatrixRow)(const uint8_t* src_rgb, int src_stride_rgb, - uint8_t* dst_u, uint8_t* dst_v, int width, - const struct ArgbConstants* c) = RGBToUVMatrixRow_C; - void (*RGBToYMatrixRow)(const uint8_t* src_rgb, uint8_t* dst_y, int width, - const struct ArgbConstants* c) = RGBToYMatrixRow_C; - -#if defined(HAS_RGBTOYMATRIXROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - RGBToYMatrixRow = RGBToYMatrixRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - RGBToYMatrixRow = RGBToYMatrixRow_AVX2; + void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = + RAWToARGBRow_C; +#if defined(HAS_RAWTOARGBROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + RAWToARGBRow = RAWToARGBRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + RAWToARGBRow = RAWToARGBRow_SSSE3; } } #endif -#if defined(HAS_RGBTOUVMATRIXROW_AVX2) +#if defined(HAS_RAWTOARGBROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - RGBToUVMatrixRow = RGBToUVMatrixRow_Any_AVX2; + RAWToARGBRow = RAWToARGBRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { - RGBToUVMatrixRow = RGBToUVMatrixRow_AVX2; + RAWToARGBRow = RAWToARGBRow_AVX2; } } #endif -#if defined(HAS_RGBTOUVMATRIXROW_AVX512BW) +#if defined(HAS_RAWTOARGBROW_AVX512BW) if (TestCpuFlag(kCpuHasAVX512BW)) { - RGBToUVMatrixRow = RGBToUVMatrixRow_Any_AVX512BW; + RAWToARGBRow = RAWToARGBRow_Any_AVX512BW; if (IS_ALIGNED(width, 64)) { - RGBToUVMatrixRow = RGBToUVMatrixRow_AVX512BW; + RAWToARGBRow = RAWToARGBRow_AVX512BW; } } #endif -#if defined(HAS_RGBTOUVMATRIXROW_NEON) +#if defined(HAS_RAWTOARGBROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { - RGBToUVMatrixRow = RGBToUVMatrixRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - RGBToUVMatrixRow = RGBToUVMatrixRow_NEON; + RAWToARGBRow = RAWToARGBRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + RAWToARGBRow = RAWToARGBRow_NEON; } } #endif -#if defined(HAS_RGBTOYMATRIXROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - RGBToYMatrixRow = RGBToYMatrixRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - RGBToYMatrixRow = RGBToYMatrixRow_NEON; - } +#if defined(HAS_RAWTOARGBROW_SVE2) + if (TestCpuFlag(kCpuHasSVE2)) { + RAWToARGBRow = RAWToARGBRow_SVE2; } #endif -#if defined(HAS_RGBTOYMATRIXROW_LSX) +#if defined(HAS_RAWTOARGBROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { - RGBToYMatrixRow = RGBToYMatrixRow_LSX; // This uses the NEON/LSX names + RAWToARGBRow = RAWToARGBRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + RAWToARGBRow = RAWToARGBRow_LSX; + } + } +#endif +#if defined(HAS_RAWTOARGBROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + RAWToARGBRow = RAWToARGBRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + RAWToARGBRow = RAWToARGBRow_LASX; + } + } +#endif +#if defined(HAS_RAWTOARGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + RAWToARGBRow = RAWToARGBRow_RVV; } #endif - if (!src_rgb24 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0 || - height == INT_MIN) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_rgb24 = src_rgb24 + (ptrdiff_t)(height - 1) * src_stride_rgb24; - src_stride_rgb24 = -src_stride_rgb24; - } - - for (y = 0; y < height - 1; y += 2) { - RGBToUVMatrixRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width, - &kArgbI601Constants); - RGBToYMatrixRow(src_rgb24, dst_y, width, &kArgbI601Constants); - RGBToYMatrixRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width, - &kArgbI601Constants); - src_rgb24 += src_stride_rgb24 * 2; - dst_y += dst_stride_y * 2; - dst_u += dst_stride_u; - dst_v += dst_stride_v; - } - if (height & 1) { - RGBToYMatrixRow(src_rgb24, dst_y, width, &kArgbI601Constants); - RGBToUVMatrixRow(src_rgb24, 0, dst_u, dst_v, width, &kArgbI601Constants); - } - return 0; + return RGBToI420Matrix(src_raw, src_stride_raw, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, width, height, + &kArgbI601Constants, RAWToARGBRow); } -#undef HAS_RAWTOYROW - -// Enabled if 1 pass is available -#if defined(HAS_RAWTOYJROW_NEON) || defined(HAS_RAWTOYJROW_RVV) -#define HAS_RAWTOYJROW -#endif // Convert RAW to J420. LIBYUV_API @@ -2972,75 +3375,8 @@ int RAWToJ420(const uint8_t* src_raw, int dst_stride_v, int width, int height) { - int y; -#if defined(HAS_RAWTOYJROW) - void (*RAWToUVJRow)(const uint8_t* src_raw, int src_stride_raw, - uint8_t* dst_u, uint8_t* dst_v, int width) = - RAWToUVJRow_C; - void (*RAWToYJRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) = - RAWToYJRow_C; -#else void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = RAWToARGBRow_C; - void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb, - uint8_t* dst_u, uint8_t* dst_v, int width) = - ARGBToUVJRow_C; - void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = - ARGBToYJRow_C; -#endif - if (!src_raw || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0 || - height == INT_MIN) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_raw = src_raw + (ptrdiff_t)(height - 1) * src_stride_raw; - src_stride_raw = -src_stride_raw; - } - -#if defined(HAS_RAWTOYJROW) - -// Neon version does direct RAW to YUV. -#if defined(HAS_RAWTOYJROW_NEON) && defined(HAS_RAWTOUVJROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - RAWToUVJRow = RAWToUVJRow_Any_NEON; - RAWToYJRow = RAWToYJRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - RAWToYJRow = RAWToYJRow_NEON; - RAWToUVJRow = RAWToUVJRow_NEON; - } - } -#endif -#if defined(HAS_RAWTOYJROW_LSX) && defined(HAS_RAWTOUVJROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - RAWToUVJRow = RAWToUVJRow_Any_LSX; - RAWToYJRow = RAWToYJRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - RAWToYJRow = RAWToYJRow_LSX; - RAWToUVJRow = RAWToUVJRow_LSX; - } - } -#endif -#if defined(HAS_RAWTOYJROW_LASX) && defined(HAS_RAWTOUVJROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - RAWToUVJRow = RAWToUVJRow_Any_LASX; - RAWToYJRow = RAWToYJRow_Any_LASX; - if (IS_ALIGNED(width, 32)) { - RAWToYJRow = RAWToYJRow_LASX; - RAWToUVJRow = RAWToUVJRow_LASX; - } - } -#endif -#if defined(HAS_RAWTOYJROW_RVV) - if (TestCpuFlag(kCpuHasRVV)) { - RAWToYJRow = RAWToYJRow_RVV; - } -#endif - -// Other platforms do intermediate conversion from RAW to ARGB. -#else // HAS_RAWTOYJROW - #if defined(HAS_RAWTOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { RAWToARGBRow = RAWToARGBRow_Any_SSSE3; @@ -3099,85 +3435,160 @@ int RAWToJ420(const uint8_t* src_raw, RAWToARGBRow = RAWToARGBRow_RVV; } #endif -#if defined(HAS_ARGBTOYJROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToYJRow = ARGBToYJRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToYJRow = ARGBToYJRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOYJROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToYJRow = ARGBToYJRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToYJRow = ARGBToYJRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOUVJROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToUVJRow = ARGBToUVJRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOUVJROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToUVJRow = ARGBToUVJRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToUVJRow = ARGBToUVJRow_AVX2; - } - } -#endif -#endif // HAS_RAWTOYJROW + return RGBToI420Matrix(src_raw, src_stride_raw, dst_y, dst_stride_y, dst_u, + dst_stride_u, dst_v, dst_stride_v, width, height, + &kArgbJPEGConstants, RAWToARGBRow); +} + + +// RAW big endian (rgb in memory) to I444 +static int RGBToI444Matrix(const uint8_t* src_rgb, + int src_stride_rgb, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_u, + int dst_stride_u, + uint8_t* dst_v, + int dst_stride_v, + int width, + int height, + const struct ArgbConstants* argbconstants, + void (*RGBToARGBRow)(const uint8_t* src_rgb, + uint8_t* dst_argb, + int width)) { + int y; + void (*ARGBToYMatrixRow)(const uint8_t* src_argb, uint8_t* dst_y, int width, + const struct ArgbConstants* c) = ARGBToYMatrixRow_C; + void (*ARGBToUV444MatrixRow)(const uint8_t* src_argb, uint8_t* dst_u, + uint8_t* dst_v, int width, + const struct ArgbConstants* c) = + ARGBToUV444MatrixRow_C; + +#if defined(HAS_ARGBTOUV444MATRIXROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUV444MatrixRow = ARGBToUV444MatrixRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUV444MatrixRow = ARGBToUV444MatrixRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOUV444MATRIXROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUV444MatrixRow = ARGBToUV444MatrixRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToUV444MatrixRow = ARGBToUV444MatrixRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOUV444MATRIXROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + ARGBToUV444MatrixRow = ARGBToUV444MatrixRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + ARGBToUV444MatrixRow = ARGBToUV444MatrixRow_AVX512BW; + } + } +#endif +#if defined(HAS_ARGBTOUV444MATRIXROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToUV444MatrixRow = ARGBToUV444MatrixRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + ARGBToUV444MatrixRow = ARGBToUV444MatrixRow_NEON; + } + } +#endif + +#if defined(HAS_ARGBTOYMATRIXROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_AVX512BW; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_NEON; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_NEON_DOTPROD) + if (TestCpuFlag(kCpuHasNeonDotProd)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON_DotProd; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_NEON_DotProd; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_LSX; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_LASX; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_RVV; + } +#endif + + if (!src_rgb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { + return -1; + } + if (height < 0) { + height = -height; + src_rgb = src_rgb + (height - 1) * src_stride_rgb; + src_stride_rgb = -src_stride_rgb; + } { -#if !defined(HAS_RAWTOYJROW) - // Allocate 2 rows of ARGB. - const int row_size = (width * 4 + 31) & ~31; - align_buffer_64(row, row_size * 2); + // Allocate a row of ARGB. + const int row_size = width * 4; + align_buffer_64(row, row_size); if (!row) return 1; -#endif - for (y = 0; y < height - 1; y += 2) { -#if defined(HAS_RAWTOYJROW) - RAWToUVJRow(src_raw, src_stride_raw, dst_u, dst_v, width); - RAWToYJRow(src_raw, dst_y, width); - RAWToYJRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width); -#else - RAWToARGBRow(src_raw, row, width); - RAWToARGBRow(src_raw + src_stride_raw, row + row_size, width); - ARGBToUVJRow(row, row_size, dst_u, dst_v, width); - ARGBToYJRow(row, dst_y, width); - ARGBToYJRow(row + row_size, dst_y + dst_stride_y, width); -#endif - src_raw += src_stride_raw * 2; - dst_y += dst_stride_y * 2; + for (y = 0; y < height; ++y) { + RGBToARGBRow(src_rgb, row, width); + ARGBToUV444MatrixRow(row, dst_u, dst_v, width, argbconstants); + ARGBToYMatrixRow(row, dst_y, width, argbconstants); + src_rgb += src_stride_rgb; + dst_y += dst_stride_y; dst_u += dst_stride_u; dst_v += dst_stride_v; } - if (height & 1) { -#if defined(HAS_RAWTOYJROW) - RAWToUVJRow(src_raw, 0, dst_u, dst_v, width); - RAWToYJRow(src_raw, dst_y, width); -#else - RAWToARGBRow(src_raw, row, width); - ARGBToUVJRow(row, 0, dst_u, dst_v, width); - ARGBToYJRow(row, dst_y, width); -#endif - } -#if !defined(HAS_RAWTOYJROW) free_aligned_buffer_64(row); -#endif } return 0; } -#undef HAS_RAWTOYJROW -// RAW big endian (rgb in memory) to I444 // 2 step conversion of RAWToARGB then ARGBToY and ARGBToUV444 LIBYUV_API int RAWToI444(const uint8_t* src_raw, @@ -3190,142 +3601,8 @@ int RAWToI444(const uint8_t* src_raw, int dst_stride_v, int width, int height) { - int y; - void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = - RAWToARGBRow_C; - void (*ARGBToYRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) = - ARGBToYRow_C; - void (*ARGBToUV444Row)(const uint8_t* src_raw, uint8_t* dst_u, uint8_t* dst_v, - int width) = ARGBToUV444Row_C; - if (!src_raw || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0 || - height == INT_MIN) { - return -1; - } - if (height < 0) { - height = -height; - src_raw = src_raw + (ptrdiff_t)(height - 1) * src_stride_raw; - src_stride_raw = -src_stride_raw; - } - // TODO: add row coalesce when main loop handles large width in blocks - // TODO: implement UV444 or trim the ifdef below -#if defined(HAS_ARGBTOUV444ROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToUV444Row = ARGBToUV444Row_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToUV444Row = ARGBToUV444Row_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOUV444ROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToUV444Row = ARGBToUV444Row_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToUV444Row = ARGBToUV444Row_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOUV444ROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - ARGBToUV444Row = ARGBToUV444Row_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - ARGBToUV444Row = ARGBToUV444Row_AVX512BW; - } - } -#endif -#if defined(HAS_ARGBTOUV444ROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToUV444Row = ARGBToUV444Row_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGBToUV444Row = ARGBToUV444Row_NEON; - } - } -#endif -#if defined(HAS_ARGBTOUV444ROW_NEON_I8MM) - if (TestCpuFlag(kCpuHasNeonI8MM)) { - ARGBToUV444Row = ARGBToUV444Row_Any_NEON_I8MM; - if (IS_ALIGNED(width, 8)) { - ARGBToUV444Row = ARGBToUV444Row_NEON_I8MM; - } - } -#endif -#if defined(HAS_ARGBTOUV444ROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - ARGBToUV444Row = ARGBToUV444Row_Any_LSX; - if (IS_ALIGNED(width, 16)) { - ARGBToUV444Row = ARGBToUV444Row_LSX; - } - } -#endif -#if defined(HAS_ARGBTOUV444ROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - ARGBToUV444Row = ARGBToUV444Row_Any_LASX; - if (IS_ALIGNED(width, 32)) { - ARGBToUV444Row = ARGBToUV444Row_LASX; - } - } -#endif -#if defined(HAS_ARGBTOYROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToYRow = ARGBToYRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToYRow = ARGBToYRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOYROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToYRow = ARGBToYRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToYRow = ARGBToYRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOYROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - ARGBToYRow = ARGBToYRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - ARGBToYRow = ARGBToYRow_AVX512BW; - } - } -#endif -#if defined(HAS_ARGBTOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToYRow = ARGBToYRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToYRow = ARGBToYRow_NEON; - } - } -#endif -#if defined(HAS_ARGBTOYROW_NEON_DOTPROD) - if (TestCpuFlag(kCpuHasNeonDotProd)) { - ARGBToYRow = ARGBToYRow_Any_NEON_DotProd; - if (IS_ALIGNED(width, 16)) { - ARGBToYRow = ARGBToYRow_NEON_DotProd; - } - } -#endif -#if defined(HAS_ARGBTOYROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - ARGBToYRow = ARGBToYRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - ARGBToYRow = ARGBToYRow_LSX; - } - } -#endif -#if defined(HAS_ARGBTOYROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - ARGBToYRow = ARGBToYRow_Any_LASX; - if (IS_ALIGNED(width, 32)) { - ARGBToYRow = ARGBToYRow_LASX; - } - } -#endif -#if defined(HAS_ARGBTOYROW_RVV) - if (TestCpuFlag(kCpuHasRVV)) { - ARGBToYRow = ARGBToYRow_RVV; - } -#endif - + void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, + int width) = RAWToARGBRow_C; #if defined(HAS_RAWTOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { RAWToARGBRow = RAWToARGBRow_Any_SSSE3; @@ -3384,26 +3661,10 @@ int RAWToI444(const uint8_t* src_raw, RAWToARGBRow = RAWToARGBRow_RVV; } #endif - - { - // Allocate a row of ARGB. - const int row_size = width * 4; - align_buffer_64(row, row_size); - if (!row) - return 1; - - for (y = 0; y < height; ++y) { - RAWToARGBRow(src_raw, row, width); - ARGBToUV444Row(row, dst_u, dst_v, width); - ARGBToYRow(row, dst_y, width); - src_raw += src_stride_raw; - dst_y += dst_stride_y; - dst_u += dst_stride_u; - dst_v += dst_stride_v; - } - free_aligned_buffer_64(row); - } - return 0; + return RGBToI444Matrix(src_raw, src_stride_raw, + dst_y, dst_stride_y, dst_u, dst_stride_u, + dst_v, dst_stride_v, width, height, + &kArgbI601Constants, RAWToARGBRow); } // RAW big endian (rgb in memory) to J444 @@ -3419,133 +3680,8 @@ int RAWToJ444(const uint8_t* src_raw, int dst_stride_v, int width, int height) { - int y; - void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = - RAWToARGBRow_C; - void (*ARGBToYJRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) = - ARGBToYJRow_C; - void (*ARGBToUVJ444Row)(const uint8_t* src_raw, uint8_t* dst_u, - uint8_t* dst_v, int width) = ARGBToUVJ444Row_C; - if (!src_raw || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0 || - height == INT_MIN) { - return -1; - } - if (height < 0) { - height = -height; - src_raw = src_raw + (ptrdiff_t)(height - 1) * src_stride_raw; - src_stride_raw = -src_stride_raw; - } - // TODO: add row coalesce when main loop handles large width in blocks -#if defined(HAS_ARGBTOUVJ444ROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToUVJ444Row = ARGBToUVJ444Row_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToUVJ444Row = ARGBToUVJ444Row_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOUVJ444ROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToUVJ444Row = ARGBToUVJ444Row_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToUVJ444Row = ARGBToUVJ444Row_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOUVJ444ROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - ARGBToUVJ444Row = ARGBToUVJ444Row_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - ARGBToUVJ444Row = ARGBToUVJ444Row_AVX512BW; - } - } -#endif -#if defined(HAS_ARGBTOUVJ444ROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToUVJ444Row = ARGBToUVJ444Row_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGBToUVJ444Row = ARGBToUVJ444Row_NEON; - } - } -#endif -#if defined(HAS_ARGBTOUVJ444ROW_NEON_I8MM) - if (TestCpuFlag(kCpuHasNeonI8MM)) { - ARGBToUVJ444Row = ARGBToUVJ444Row_Any_NEON_I8MM; - if (IS_ALIGNED(width, 8)) { - ARGBToUVJ444Row = ARGBToUVJ444Row_NEON_I8MM; - } - } -#endif -#if defined(HAS_ARGBTOUVJ444ROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - ARGBToUVJ444Row = ARGBToUVJ444Row_Any_LSX; - if (IS_ALIGNED(width, 16)) { - ARGBToUVJ444Row = ARGBToUVJ444Row_LSX; - } - } -#endif -#if defined(HAS_ARGBTOUVJ444ROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - ARGBToUVJ444Row = ARGBToUVJ444Row_Any_LASX; - if (IS_ALIGNED(width, 32)) { - ARGBToUVJ444Row = ARGBToUVJ444Row_LASX; - } - } -#endif -#if defined(HAS_ARGBTOYJROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToYJRow = ARGBToYJRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToYJRow = ARGBToYJRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOYJROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToYJRow = ARGBToYJRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToYJRow = ARGBToYJRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOYJROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToYJRow = ARGBToYJRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToYJRow = ARGBToYJRow_NEON; - } - } -#endif -#if defined(HAS_ARGBTOYJROW_NEON_DOTPROD) - if (TestCpuFlag(kCpuHasNeonDotProd)) { - ARGBToYJRow = ARGBToYJRow_Any_NEON_DotProd; - if (IS_ALIGNED(width, 16)) { - ARGBToYJRow = ARGBToYJRow_NEON_DotProd; - } - } -#endif -#if defined(HAS_ARGBTOYJROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - ARGBToYJRow = ARGBToYJRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - ARGBToYJRow = ARGBToYJRow_LSX; - } - } -#endif -#if defined(HAS_ARGBTOYJROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - ARGBToYJRow = ARGBToYJRow_Any_LASX; - if (IS_ALIGNED(width, 32)) { - ARGBToYJRow = ARGBToYJRow_LASX; - } - } -#endif -#if defined(HAS_ARGBTOYJROW_RVV) - if (TestCpuFlag(kCpuHasRVV)) { - ARGBToYJRow = ARGBToYJRow_RVV; - } -#endif - + void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, + int width) = RAWToARGBRow_C; #if defined(HAS_RAWTOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { RAWToARGBRow = RAWToARGBRow_Any_SSSE3; @@ -3604,26 +3740,10 @@ int RAWToJ444(const uint8_t* src_raw, RAWToARGBRow = RAWToARGBRow_RVV; } #endif - - { - // Allocate a row of ARGB. - const int row_size = width * 4; - align_buffer_64(row, row_size); - if (!row) - return 1; - - for (y = 0; y < height; ++y) { - RAWToARGBRow(src_raw, row, width); - ARGBToUVJ444Row(row, dst_u, dst_v, width); - ARGBToYJRow(row, dst_y, width); - src_raw += src_stride_raw; - dst_y += dst_stride_y; - dst_u += dst_stride_u; - dst_v += dst_stride_v; - } - free_aligned_buffer_64(row); - } - return 0; + return RGBToI444Matrix(src_raw, src_stride_raw, + dst_y, dst_stride_y, dst_u, dst_stride_u, + dst_v, dst_stride_v, width, height, + &kArgbJPEGConstants, RAWToARGBRow); } // Convert RGB565 to I420. @@ -3638,77 +3758,30 @@ int RGB565ToI420(const uint8_t* src_rgb565, int dst_stride_v, int width, int height) { - int y; - void (*RGB565ToUVMatrixRow)(const uint8_t* src_rgb565, int src_stride_rgb565, - uint8_t* dst_u, uint8_t* dst_v, int width, - const struct ArgbConstants* c) = - RGB565ToUVMatrixRow_C; - void (*RGB565ToYMatrixRow)(const uint8_t* src_rgb565, uint8_t* dst_y, - int width, const struct ArgbConstants* c) = - RGB565ToYMatrixRow_C; - -#if defined(HAS_RGB565TOYMATRIXROW_AVX2) + void (*RGB565ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, + int width) = RGB565ToARGBRow_C; +#if defined(HAS_RGB565TOARGBROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + RGB565ToARGBRow = RGB565ToARGBRow_Any_SSE2; + if (IS_ALIGNED(width, 8)) { + RGB565ToARGBRow = RGB565ToARGBRow_SSE2; + } + } +#endif +#if defined(HAS_RGB565TOARGBROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - RGB565ToYMatrixRow = RGB565ToYMatrixRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - RGB565ToYMatrixRow = RGB565ToYMatrixRow_AVX2; - } - } -#endif -#if defined(HAS_RGB565TOUVMATRIXROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - RGB565ToUVMatrixRow = RGB565ToUVMatrixRow_Any_AVX2; + RGB565ToARGBRow = RGB565ToARGBRow_Any_AVX2; if (IS_ALIGNED(width, 16)) { - RGB565ToUVMatrixRow = RGB565ToUVMatrixRow_AVX2; + RGB565ToARGBRow = RGB565ToARGBRow_AVX2; } } #endif -#if defined(HAS_RGB565TOUVMATRIXROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - RGB565ToUVMatrixRow = RGB565ToUVMatrixRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - RGB565ToUVMatrixRow = RGB565ToUVMatrixRow_NEON; - } - } -#endif -#if defined(HAS_RGB565TOYMATRIXROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - RGB565ToYMatrixRow = RGB565ToYMatrixRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - RGB565ToYMatrixRow = RGB565ToYMatrixRow_NEON; - } - } -#endif - - if (!src_rgb565 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0 || - height == INT_MIN) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_rgb565 = src_rgb565 + (ptrdiff_t)(height - 1) * src_stride_rgb565; - src_stride_rgb565 = -src_stride_rgb565; - } - - for (y = 0; y < height - 1; y += 2) { - RGB565ToUVMatrixRow(src_rgb565, src_stride_rgb565, dst_u, dst_v, width, - &kArgbI601Constants); - RGB565ToYMatrixRow(src_rgb565, dst_y, width, &kArgbI601Constants); - RGB565ToYMatrixRow(src_rgb565 + src_stride_rgb565, dst_y + dst_stride_y, - width, &kArgbI601Constants); - src_rgb565 += src_stride_rgb565 * 2; - dst_y += dst_stride_y * 2; - dst_u += dst_stride_u; - dst_v += dst_stride_v; - } - if (height & 1) { - RGB565ToYMatrixRow(src_rgb565, dst_y, width, &kArgbI601Constants); - RGB565ToUVMatrixRow(src_rgb565, 0, dst_u, dst_v, width, - &kArgbI601Constants); - } - return 0; + return RGBToI420Matrix(src_rgb565, src_stride_rgb565, + dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, + dst_stride_v, width, height, + &kArgbI601Constants, RGB565ToARGBRow); } + // Convert ARGB1555 to I420. LIBYUV_API int ARGB1555ToI420(const uint8_t* src_argb1555, @@ -3721,77 +3794,30 @@ int ARGB1555ToI420(const uint8_t* src_argb1555, int dst_stride_v, int width, int height) { - int y; - void (*ARGB1555ToUVMatrixRow)( - const uint8_t* src_argb1555, int src_stride_argb1555, uint8_t* dst_u, - uint8_t* dst_v, int width, const struct ArgbConstants* c) = - ARGB1555ToUVMatrixRow_C; - void (*ARGB1555ToYMatrixRow)(const uint8_t* src_argb1555, uint8_t* dst_y, - int width, const struct ArgbConstants* c) = - ARGB1555ToYMatrixRow_C; - -#if defined(HAS_ARGB1555TOYMATRIXROW_AVX2) + void (*ARGB1555ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, + int width) = ARGB1555ToARGBRow_C; +#if defined(HAS_ARGB1555TOARGBROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_SSE2; + if (IS_ALIGNED(width, 8)) { + ARGB1555ToARGBRow = ARGB1555ToARGBRow_SSE2; + } + } +#endif +#if defined(HAS_ARGB1555TOARGBROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ARGB1555ToYMatrixRow = ARGB1555ToYMatrixRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGB1555ToYMatrixRow = ARGB1555ToYMatrixRow_AVX2; - } - } -#endif -#if defined(HAS_ARGB1555TOUVMATRIXROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGB1555ToUVMatrixRow = ARGB1555ToUVMatrixRow_Any_AVX2; + ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_AVX2; if (IS_ALIGNED(width, 16)) { - ARGB1555ToUVMatrixRow = ARGB1555ToUVMatrixRow_AVX2; + ARGB1555ToARGBRow = ARGB1555ToARGBRow_AVX2; } } #endif -#if defined(HAS_ARGB1555TOUVMATRIXROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGB1555ToUVMatrixRow = ARGB1555ToUVMatrixRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGB1555ToUVMatrixRow = ARGB1555ToUVMatrixRow_NEON; - } - } -#endif -#if defined(HAS_ARGB1555TOYMATRIXROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGB1555ToYMatrixRow = ARGB1555ToYMatrixRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGB1555ToYMatrixRow = ARGB1555ToYMatrixRow_NEON; - } - } -#endif - - if (!src_argb1555 || !dst_y || !dst_u || !dst_v || width <= 0 || - height == 0 || height == INT_MIN) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_argb1555 = src_argb1555 + (ptrdiff_t)(height - 1) * src_stride_argb1555; - src_stride_argb1555 = -src_stride_argb1555; - } - - for (y = 0; y < height - 1; y += 2) { - ARGB1555ToUVMatrixRow(src_argb1555, src_stride_argb1555, dst_u, dst_v, - width, &kArgbI601Constants); - ARGB1555ToYMatrixRow(src_argb1555, dst_y, width, &kArgbI601Constants); - ARGB1555ToYMatrixRow(src_argb1555 + src_stride_argb1555, - dst_y + dst_stride_y, width, &kArgbI601Constants); - src_argb1555 += src_stride_argb1555 * 2; - dst_y += dst_stride_y * 2; - dst_u += dst_stride_u; - dst_v += dst_stride_v; - } - if (height & 1) { - ARGB1555ToYMatrixRow(src_argb1555, dst_y, width, &kArgbI601Constants); - ARGB1555ToUVMatrixRow(src_argb1555, 0, dst_u, dst_v, width, - &kArgbI601Constants); - } - return 0; + return RGBToI420Matrix(src_argb1555, src_stride_argb1555, + dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, + dst_stride_v, width, height, + &kArgbI601Constants, ARGB1555ToARGBRow); } + // Convert ARGB4444 to I420. LIBYUV_API int ARGB4444ToI420(const uint8_t* src_argb4444, @@ -3804,90 +3830,62 @@ int ARGB4444ToI420(const uint8_t* src_argb4444, int dst_stride_v, int width, int height) { - int y; - void (*ARGB4444ToUVMatrixRow)( - const uint8_t* src_argb4444, int src_stride_argb4444, uint8_t* dst_u, - uint8_t* dst_v, int width, const struct ArgbConstants* c) = - ARGB4444ToUVMatrixRow_C; - void (*ARGB4444ToYMatrixRow)(const uint8_t* src_argb4444, uint8_t* dst_y, - int width, const struct ArgbConstants* c) = - ARGB4444ToYMatrixRow_C; - -#if defined(HAS_ARGB4444TOYMATRIXROW_AVX2) + void (*ARGB4444ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, + int width) = ARGB4444ToARGBRow_C; +#if defined(HAS_ARGB4444TOARGBROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_SSE2; + if (IS_ALIGNED(width, 8)) { + ARGB4444ToARGBRow = ARGB4444ToARGBRow_SSE2; + } + } +#endif +#if defined(HAS_ARGB4444TOARGBROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ARGB4444ToYMatrixRow = ARGB4444ToYMatrixRow_Any_AVX2; + ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_AVX2; + if (IS_ALIGNED(width, 16)) { + ARGB4444ToARGBRow = ARGB4444ToARGBRow_AVX2; + } + } +#endif +#if defined(HAS_ARGB4444TOARGBROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGB4444ToARGBRow = ARGB4444ToARGBRow_LSX; + } + } +#endif +#if defined(HAS_ARGB4444TOARGBROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_LASX; if (IS_ALIGNED(width, 32)) { - ARGB4444ToYMatrixRow = ARGB4444ToYMatrixRow_AVX2; + ARGB4444ToARGBRow = ARGB4444ToARGBRow_LASX; } } #endif -#if defined(HAS_ARGB4444TOUVMATRIXROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGB4444ToUVMatrixRow = ARGB4444ToUVMatrixRow_Any_AVX2; - if (IS_ALIGNED(width, 16)) { - ARGB4444ToUVMatrixRow = ARGB4444ToUVMatrixRow_AVX2; - } - } -#endif -#if defined(HAS_ARGB4444TOUVMATRIXROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGB4444ToUVMatrixRow = ARGB4444ToUVMatrixRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGB4444ToUVMatrixRow = ARGB4444ToUVMatrixRow_NEON; - } - } -#endif -#if defined(HAS_ARGB4444TOYMATRIXROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGB4444ToYMatrixRow = ARGB4444ToYMatrixRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGB4444ToYMatrixRow = ARGB4444ToYMatrixRow_NEON; - } - } -#endif - - if (!src_argb4444 || !dst_y || !dst_u || !dst_v || width <= 0 || - height == 0 || height == INT_MIN) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_argb4444 = src_argb4444 + (ptrdiff_t)(height - 1) * src_stride_argb4444; - src_stride_argb4444 = -src_stride_argb4444; - } - - for (y = 0; y < height - 1; y += 2) { - ARGB4444ToUVMatrixRow(src_argb4444, src_stride_argb4444, dst_u, dst_v, - width, &kArgbI601Constants); - ARGB4444ToYMatrixRow(src_argb4444, dst_y, width, &kArgbI601Constants); - ARGB4444ToYMatrixRow(src_argb4444 + src_stride_argb4444, - dst_y + dst_stride_y, width, &kArgbI601Constants); - src_argb4444 += src_stride_argb4444 * 2; - dst_y += dst_stride_y * 2; - dst_u += dst_stride_u; - dst_v += dst_stride_v; - } - if (height & 1) { - ARGB4444ToYMatrixRow(src_argb4444, dst_y, width, &kArgbI601Constants); - ARGB4444ToUVMatrixRow(src_argb4444, 0, dst_u, dst_v, width, - &kArgbI601Constants); - } - return 0; + return RGBToI420Matrix(src_argb4444, src_stride_argb4444, + dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, + dst_stride_v, width, height, + &kArgbI601Constants, ARGB4444ToARGBRow); } -// Convert RGB24 to J400. -LIBYUV_API -int RGB24ToJ400(const uint8_t* src_rgb24, - int src_stride_rgb24, - uint8_t* dst_yj, - int dst_stride_yj, - int width, - int height) { + + + +static int RGBToI400Matrix(const uint8_t* src_rgb, + int src_stride_rgb, + uint8_t* dst_y, + int dst_stride_y, + int width, + int height, + const struct ArgbConstants* argbconstants, + void (*RGBToARGBRow)(const uint8_t* src_rgb, + uint8_t* dst_argb, + int width)) { int y; - void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = - RGB24ToARGBRow_C; void (*ARGBToYMatrixRow)(const uint8_t* src_argb, uint8_t* dst_y, int width, const struct ArgbConstants* c) = ARGBToYMatrixRow_C; + #if defined(HAS_ARGBTOYMATRIXROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToYMatrixRow = ARGBToYMatrixRow_Any_SSSE3; @@ -3950,79 +3948,16 @@ int RGB24ToJ400(const uint8_t* src_rgb24, } #endif - if (!src_rgb24 || !dst_yj || width <= 0 || height == 0 || height == INT_MIN) { + if (!src_rgb || !dst_y || width <= 0 || height == 0) { return -1; } + // Negative height means invert the image. if (height < 0) { height = -height; - src_rgb24 = src_rgb24 + (ptrdiff_t)(height - 1) * src_stride_rgb24; - src_stride_rgb24 = -src_stride_rgb24; + src_rgb = src_rgb + (height - 1) * src_stride_rgb; + src_stride_rgb = -src_stride_rgb; } - // Coalesce rows. - if (src_stride_rgb24 == width * 3 && dst_stride_yj == width && - (ptrdiff_t)width * height <= INT_MAX) { - width *= height; - height = 1; - src_stride_rgb24 = dst_stride_yj = 0; - } -#if defined(HAS_RGB24TOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - RGB24ToARGBRow = RGB24ToARGBRow_SSSE3; - } - } -#endif -#if defined(HAS_RGB24TOARGBROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - RGB24ToARGBRow = RGB24ToARGBRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - RGB24ToARGBRow = RGB24ToARGBRow_AVX2; - } - } -#endif -#if defined(HAS_RGB24TOARGBROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - RGB24ToARGBRow = RGB24ToARGBRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - RGB24ToARGBRow = RGB24ToARGBRow_AVX512BW; - } - } -#endif -#if defined(HAS_RGB24TOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - RGB24ToARGBRow = RGB24ToARGBRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - RGB24ToARGBRow = RGB24ToARGBRow_NEON; - } - } -#endif -#if defined(HAS_RGB24TOARGBROW_SVE2) - if (TestCpuFlag(kCpuHasSVE2)) { - RGB24ToARGBRow = RGB24ToARGBRow_SVE2; - } -#endif -#if defined(HAS_RGB24TOARGBROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - RGB24ToARGBRow = RGB24ToARGBRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - RGB24ToARGBRow = RGB24ToARGBRow_LSX; - } - } -#endif -#if defined(HAS_RGB24TOARGBROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - RGB24ToARGBRow = RGB24ToARGBRow_Any_LASX; - if (IS_ALIGNED(width, 32)) { - RGB24ToARGBRow = RGB24ToARGBRow_LASX; - } - } -#endif -#if defined(HAS_RGB24TOARGBROW_RVV) - if (TestCpuFlag(kCpuHasRVV)) { - RGB24ToARGBRow = RGB24ToARGBRow_RVV; - } -#endif + { // Allocate 1 row of ARGB. const int row_size = (width * 4 + 31) & ~31; @@ -4031,10 +3966,10 @@ int RGB24ToJ400(const uint8_t* src_rgb24, return 1; for (y = 0; y < height; ++y) { - RGB24ToARGBRow(src_rgb24, row, width); - ARGBToYMatrixRow(row, dst_yj, width, &kArgbJPEGConstants); - src_rgb24 += src_stride_rgb24; - dst_yj += dst_stride_yj; + RGBToARGBRow(src_rgb, row, width); + ARGBToYMatrixRow(row, dst_y, width, argbconstants); + src_rgb += src_stride_rgb; + dst_y += dst_stride_y; } free_aligned_buffer_64(row); } @@ -4049,89 +3984,8 @@ int RAWToJ400(const uint8_t* src_raw, int dst_stride_yj, int width, int height) { - int y; - void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = - RAWToARGBRow_C; - void (*ARGBToYMatrixRow)(const uint8_t* src_argb, uint8_t* dst_y, int width, - const struct ArgbConstants* c) = ARGBToYMatrixRow_C; -#if defined(HAS_ARGBTOYMATRIXROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_AVX512BW; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_NEON; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_NEON_DOTPROD) - if (TestCpuFlag(kCpuHasNeonDotProd)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON_DotProd; - if (IS_ALIGNED(width, 16)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_NEON_DotProd; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_LSX; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LASX; - if (IS_ALIGNED(width, 32)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_LASX; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_RVV) - if (TestCpuFlag(kCpuHasRVV)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_RVV; - } -#endif - - if (!src_raw || !dst_yj || width <= 0 || height == 0 || height == INT_MIN) { - return -1; - } - if (height < 0) { - height = -height; - src_raw = src_raw + (ptrdiff_t)(height - 1) * src_stride_raw; - src_stride_raw = -src_stride_raw; - } - // Coalesce rows. - if (src_stride_raw == width * 3 && dst_stride_yj == width && - (ptrdiff_t)width * height <= INT_MAX) { - width *= height; - height = 1; - src_stride_raw = dst_stride_yj = 0; - } - + void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, + int width) = RAWToARGBRow_C; #if defined(HAS_RAWTOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { RAWToARGBRow = RAWToARGBRow_Any_SSSE3; @@ -4190,23 +4044,9 @@ int RAWToJ400(const uint8_t* src_raw, RAWToARGBRow = RAWToARGBRow_RVV; } #endif - - { - // Allocate 1 row of ARGB. - const int row_size = (width * 4 + 31) & ~31; - align_buffer_64(row, row_size); - if (!row) - return 1; - - for (y = 0; y < height; ++y) { - RAWToARGBRow(src_raw, row, width); - ARGBToYMatrixRow(row, dst_yj, width, &kArgbJPEGConstants); - src_raw += src_stride_raw; - dst_yj += dst_stride_yj; - } - free_aligned_buffer_64(row); - } - return 0; + return RGBToI400Matrix(src_raw, src_stride_raw, + dst_yj, dst_stride_yj, width, height, + &kArgbJPEGConstants, RAWToARGBRow); } // Convert Android420 to I420. @@ -4246,19 +4086,18 @@ static int Biplanar16bitTo8bit(const uint16_t* src_y, int subsample_x, int subsample_y, int depth) { - if ((!src_y && dst_y) || !src_uv || !dst_uv || width <= 0 || height == 0 || - height == INT_MIN) { - return -1; - } int uv_width = SUBSAMPLE(width, subsample_x, subsample_x); int uv_height = SUBSAMPLE(height, subsample_y, subsample_y); int scale = 1 << (24 - depth); + if ((!src_y && dst_y) || !src_uv || !dst_uv || width <= 0 || height == 0) { + return -1; + } // Negative height means invert the image. if (height < 0) { height = -height; uv_height = -uv_height; - src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y; - src_uv = src_uv + (ptrdiff_t)(uv_height - 1) * src_stride_uv; + src_y = src_y + (height - 1) * src_stride_y; + src_uv = src_uv + (uv_height - 1) * src_stride_uv; src_stride_y = -src_stride_y; src_stride_uv = -src_stride_uv; } @@ -4313,19 +4152,19 @@ static int Planar8bitTo8bit(const uint8_t* src_y, int bias_y, int scale_uv, int bias_uv) { - if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 || - height == 0 || height == INT_MIN) { - return -1; - } int uv_width = SUBSAMPLE(width, subsample_x, subsample_x); int uv_height = SUBSAMPLE(height, subsample_y, subsample_y); + if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 || + height == 0) { + return -1; + } // Negative height means invert the image. if (height < 0) { height = -height; uv_height = -uv_height; - src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y; - src_u = src_u + (ptrdiff_t)(uv_height - 1) * src_stride_u; - src_v = src_v + (ptrdiff_t)(uv_height - 1) * src_stride_v; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (uv_height - 1) * src_stride_u; + src_v = src_v + (uv_height - 1) * src_stride_v; src_stride_y = -src_stride_y; src_stride_u = -src_stride_u; src_stride_v = -src_stride_v; @@ -4365,7 +4204,82 @@ int J420ToI420(const uint8_t* src_y, 1, 220, 16, 225, 16); } +LIBYUV_API +int RGB24ToJ400(const uint8_t* src_rgb24, + int src_stride_rgb24, + uint8_t* dst_yj, + int dst_stride_yj, + int width, + int height) { + void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, + int width) = RGB24ToARGBRow_C; +#if defined(HAS_RGB24TOARGBROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + RGB24ToARGBRow = RGB24ToARGBRow_SSSE3; + } + } +#endif +#if defined(HAS_RGB24TOARGBROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + RGB24ToARGBRow = RGB24ToARGBRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + RGB24ToARGBRow = RGB24ToARGBRow_AVX2; + } + } +#endif +#if defined(HAS_RGB24TOARGBROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + RGB24ToARGBRow = RGB24ToARGBRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + RGB24ToARGBRow = RGB24ToARGBRow_AVX512BW; + } + } +#endif +#if defined(HAS_RGB24TOARGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + RGB24ToARGBRow = RGB24ToARGBRow_Any_NEON; + if (IS_ALIGNED(width, 8)) { + RGB24ToARGBRow = RGB24ToARGBRow_NEON; + } + } +#endif +#if defined(HAS_RGB24TOARGBROW_SVE2) + if (TestCpuFlag(kCpuHasSVE2)) { + RGB24ToARGBRow = RGB24ToARGBRow_SVE2; + } +#endif +#if defined(HAS_RGB24TOARGBROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + RGB24ToARGBRow = RGB24ToARGBRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + RGB24ToARGBRow = RGB24ToARGBRow_LSX; + } + } +#endif +#if defined(HAS_RGB24TOARGBROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + RGB24ToARGBRow = RGB24ToARGBRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + RGB24ToARGBRow = RGB24ToARGBRow_LASX; + } + } +#endif +#if defined(HAS_RGB24TOARGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + RGB24ToARGBRow = RGB24ToARGBRow_RVV; + } +#endif + return RGBToI400Matrix(src_rgb24, src_stride_rgb24, + dst_yj, dst_stride_yj, width, height, + &kArgbJPEGConstants, RGB24ToARGBRow); +} + #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif + + + diff --git a/source/convert_argb.cc b/source/convert_argb.cc index 3844e9691..7672a6692 100644 --- a/source/convert_argb.cc +++ b/source/convert_argb.cc @@ -11,7 +11,6 @@ #include "libyuv/convert_argb.h" #include -#include #include "libyuv/convert_from_argb.h" #include "libyuv/cpu_id.h" @@ -34,14 +33,13 @@ int ARGBCopy(const uint8_t* src_argb, int dst_stride_argb, int width, int height) { - if (!src_argb || !dst_argb || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_argb || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_argb = src_argb + (ptrdiff_t)(height - 1) * src_stride_argb; + src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } @@ -69,14 +67,13 @@ int I420ToARGBMatrix(const uint8_t* src_y, const struct YuvConstants* yuvconstants, int width) = I422ToARGBRow_C; assert(yuvconstants); - if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_argb = dst_argb + (ptrdiff_t)(height - 1) * dst_stride_argb; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } #if defined(HAS_I422TOARGBROW_SSSE3) @@ -319,20 +316,18 @@ int I422ToARGBMatrix(const uint8_t* src_y, const struct YuvConstants* yuvconstants, int width) = I422ToARGBRow_C; assert(yuvconstants); - if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_argb = dst_argb + (ptrdiff_t)(height - 1) * dst_stride_argb; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } // Coalesce rows. if (src_stride_y == width && src_stride_u * 2 == width && - src_stride_v * 2 == width && dst_stride_argb == width * 4 && - (ptrdiff_t)width * height <= INT_MAX) { + src_stride_v * 2 == width && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0; @@ -575,19 +570,18 @@ int I444ToARGBMatrix(const uint8_t* src_y, const struct YuvConstants* yuvconstants, int width) = I444ToARGBRow_C; assert(yuvconstants); - if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_argb = dst_argb + (ptrdiff_t)(height - 1) * dst_stride_argb; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } // Coalesce rows. if (src_stride_y == width && src_stride_u == width && src_stride_v == width && - dst_stride_argb == width * 4 && (ptrdiff_t)width * height <= INT_MAX) { + dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0; @@ -813,19 +807,18 @@ int I444ToRGB24Matrix(const uint8_t* src_y, const struct YuvConstants* yuvconstants, int width) = I444ToRGB24Row_C; assert(yuvconstants); - if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_rgb24 = dst_rgb24 + (ptrdiff_t)(height - 1) * dst_stride_rgb24; + dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24; dst_stride_rgb24 = -dst_stride_rgb24; } // Coalesce rows. if (src_stride_y == width && src_stride_u == width && src_stride_v == width && - dst_stride_rgb24 == width * 3 && (ptrdiff_t)width * height <= INT_MAX) { + dst_stride_rgb24 == width * 3) { width *= height; height = 1; src_stride_y = src_stride_u = src_stride_v = dst_stride_rgb24 = 0; @@ -945,14 +938,13 @@ int I010ToAR30Matrix(const uint16_t* src_y, const struct YuvConstants* yuvconstants, int width) = I210ToAR30Row_C; assert(yuvconstants); - if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_ar30 = dst_ar30 + (ptrdiff_t)(height - 1) * dst_stride_ar30; + dst_ar30 = dst_ar30 + (height - 1) * dst_stride_ar30; dst_stride_ar30 = -dst_stride_ar30; } #if defined(HAS_I210TOAR30ROW_NEON) @@ -1124,14 +1116,13 @@ int I012ToAR30Matrix(const uint16_t* src_y, const struct YuvConstants* yuvconstants, int width) = I212ToAR30Row_C; assert(yuvconstants); - if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_ar30 = dst_ar30 + (ptrdiff_t)(height - 1) * dst_stride_ar30; + dst_ar30 = dst_ar30 + (height - 1) * dst_stride_ar30; dst_stride_ar30 = -dst_stride_ar30; } #if defined(HAS_I212TOAR30ROW_SSSE3) @@ -1201,14 +1192,13 @@ int I210ToAR30Matrix(const uint16_t* src_y, const struct YuvConstants* yuvconstants, int width) = I210ToAR30Row_C; assert(yuvconstants); - if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_ar30 = dst_ar30 + (ptrdiff_t)(height - 1) * dst_stride_ar30; + dst_ar30 = dst_ar30 + (height - 1) * dst_stride_ar30; dst_stride_ar30 = -dst_stride_ar30; } #if defined(HAS_I210TOAR30ROW_NEON) @@ -1375,14 +1365,13 @@ int I410ToAR30Matrix(const uint16_t* src_y, const struct YuvConstants* yuvconstants, int width) = I410ToAR30Row_C; assert(yuvconstants); - if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_ar30 = dst_ar30 + (ptrdiff_t)(height - 1) * dst_stride_ar30; + dst_ar30 = dst_ar30 + (height - 1) * dst_stride_ar30; dst_stride_ar30 = -dst_stride_ar30; } #if defined(HAS_I410TOAR30ROW_NEON) @@ -1448,14 +1437,13 @@ int I010ToARGBMatrix(const uint16_t* src_y, const struct YuvConstants* yuvconstants, int width) = I210ToARGBRow_C; assert(yuvconstants); - if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_argb = dst_argb + (ptrdiff_t)(height - 1) * dst_stride_argb; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } #if defined(HAS_I210TOARGBROW_SSSE3) @@ -1631,14 +1619,13 @@ int I012ToARGBMatrix(const uint16_t* src_y, const struct YuvConstants* yuvconstants, int width) = I212ToARGBRow_C; assert(yuvconstants); - if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_argb = dst_argb + (ptrdiff_t)(height - 1) * dst_stride_argb; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } #if defined(HAS_I212TOARGBROW_SSSE3) @@ -1706,14 +1693,13 @@ int I210ToARGBMatrix(const uint16_t* src_y, const struct YuvConstants* yuvconstants, int width) = I210ToARGBRow_C; assert(yuvconstants); - if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_argb = dst_argb + (ptrdiff_t)(height - 1) * dst_stride_argb; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } #if defined(HAS_I210TOARGBROW_SSSE3) @@ -1886,14 +1872,13 @@ int I410ToARGBMatrix(const uint16_t* src_y, const struct YuvConstants* yuvconstants, int width) = I410ToARGBRow_C; assert(yuvconstants); - if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_argb = dst_argb + (ptrdiff_t)(height - 1) * dst_stride_argb; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } #if defined(HAS_I410TOARGBROW_SSSE3) @@ -1955,14 +1940,13 @@ int P010ToARGBMatrix(const uint16_t* src_y, const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = P210ToARGBRow_C; assert(yuvconstants); - if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_argb = dst_argb + (ptrdiff_t)(height - 1) * dst_stride_argb; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } #if defined(HAS_P210TOARGBROW_SSSE3) @@ -2025,14 +2009,13 @@ int P210ToARGBMatrix(const uint16_t* src_y, const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = P210ToARGBRow_C; assert(yuvconstants); - if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_argb = dst_argb + (ptrdiff_t)(height - 1) * dst_stride_argb; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } #if defined(HAS_P210TOARGBROW_SSSE3) @@ -2093,14 +2076,13 @@ int P010ToAR30Matrix(const uint16_t* src_y, const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = P210ToAR30Row_C; assert(yuvconstants); - if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_ar30 = dst_ar30 + (ptrdiff_t)(height - 1) * dst_stride_ar30; + dst_ar30 = dst_ar30 + (height - 1) * dst_stride_ar30; dst_stride_ar30 = -dst_stride_ar30; } #if defined(HAS_P210TOAR30ROW_SSSE3) @@ -2163,14 +2145,13 @@ int P210ToAR30Matrix(const uint16_t* src_y, const uint16_t* y_buf, const uint16_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = P210ToAR30Row_C; assert(yuvconstants); - if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_ar30 = dst_ar30 + (ptrdiff_t)(height - 1) * dst_stride_ar30; + dst_ar30 = dst_ar30 + (height - 1) * dst_stride_ar30; dst_stride_ar30 = -dst_stride_ar30; } #if defined(HAS_P210TOAR30ROW_SSSE3) @@ -2242,13 +2223,13 @@ int I420AlphaToARGBMatrix(const uint8_t* src_y, int width) = ARGBAttenuateRow_C; assert(yuvconstants); if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 || - height == 0 || height == INT_MIN) { + height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_argb = dst_argb + (ptrdiff_t)(height - 1) * dst_stride_argb; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } #if defined(HAS_I422ALPHATOARGBROW_SSSE3) @@ -2395,13 +2376,13 @@ int I422AlphaToARGBMatrix(const uint8_t* src_y, int width) = ARGBAttenuateRow_C; assert(yuvconstants); if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 || - height == 0 || height == INT_MIN) { + height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_argb = dst_argb + (ptrdiff_t)(height - 1) * dst_stride_argb; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } #if defined(HAS_I422ALPHATOARGBROW_SSSE3) @@ -2546,13 +2527,13 @@ int I444AlphaToARGBMatrix(const uint8_t* src_y, int width) = ARGBAttenuateRow_C; assert(yuvconstants); if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 || - height == 0 || height == INT_MIN) { + height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_argb = dst_argb + (ptrdiff_t)(height - 1) * dst_stride_argb; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } #if defined(HAS_I444ALPHATOARGBROW_SSSE3) @@ -2810,13 +2791,13 @@ int I010AlphaToARGBMatrix(const uint16_t* src_y, int width) = ARGBAttenuateRow_C; assert(yuvconstants); if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 || - height == 0 || height == INT_MIN) { + height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_argb = dst_argb + (ptrdiff_t)(height - 1) * dst_stride_argb; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } #if defined(HAS_I210ALPHATOARGBROW_NEON) @@ -2942,13 +2923,13 @@ int I210AlphaToARGBMatrix(const uint16_t* src_y, int width) = ARGBAttenuateRow_C; assert(yuvconstants); if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 || - height == 0 || height == INT_MIN) { + height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_argb = dst_argb + (ptrdiff_t)(height - 1) * dst_stride_argb; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } #if defined(HAS_I210ALPHATOARGBROW_NEON) @@ -3072,13 +3053,13 @@ int I410AlphaToARGBMatrix(const uint16_t* src_y, int width) = ARGBAttenuateRow_C; assert(yuvconstants); if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 || - height == 0 || height == INT_MIN) { + height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_argb = dst_argb + (ptrdiff_t)(height - 1) * dst_stride_argb; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } #if defined(HAS_I410ALPHATOARGBROW_NEON) @@ -3190,18 +3171,17 @@ int I400ToARGBMatrix(const uint8_t* src_y, const struct YuvConstants* yuvconstants, int width) = I400ToARGBRow_C; assert(yuvconstants); - if (!src_y || !dst_argb || width <= 0 || height == 0 || height == INT_MIN) { + if (!src_y || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_argb = dst_argb + (ptrdiff_t)(height - 1) * dst_stride_argb; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } // Coalesce rows. - if (src_stride_y == width && dst_stride_argb == width * 4 && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_y == width && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_y = dst_stride_argb = 0; @@ -3285,23 +3265,29 @@ int J400ToARGB(const uint8_t* src_y, int y; void (*J400ToARGBRow)(const uint8_t* src_y, uint8_t* dst_argb, int width) = J400ToARGBRow_C; - if (!src_y || !dst_argb || width <= 0 || height == 0 || height == INT_MIN) { + if (!src_y || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y; + src_y = src_y + (height - 1) * src_stride_y; src_stride_y = -src_stride_y; } // Coalesce rows. - if (src_stride_y == width && dst_stride_argb == width * 4 && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_y == width && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_y = dst_stride_argb = 0; } - +#if defined(HAS_J400TOARGBROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + J400ToARGBRow = J400ToARGBRow_Any_SSE2; + if (IS_ALIGNED(width, 8)) { + J400ToARGBRow = J400ToARGBRow_SSE2; + } + } +#endif #if defined(HAS_J400TOARGBROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { J400ToARGBRow = J400ToARGBRow_Any_AVX2; @@ -3310,14 +3296,6 @@ int J400ToARGB(const uint8_t* src_y, } } #endif -#if defined(HAS_J400TOARGBROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - J400ToARGBRow = J400ToARGBRow_Any_AVX512BW; - if (IS_ALIGNED(width, 32)) { - J400ToARGBRow = J400ToARGBRow_AVX512BW; - } - } -#endif #if defined(HAS_J400TOARGBROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { J400ToARGBRow = J400ToARGBRow_Any_NEON; @@ -3460,19 +3438,17 @@ int ARGBToBGRA(const uint8_t* src_argb, int y; void (*ARGBToBGRARow)(const uint8_t* src_argb, uint8_t* dst_bgra, int width) = ARGBToBGRARow_C; - if (!src_argb || !dst_bgra || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_argb || !dst_bgra || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_argb = src_argb + (ptrdiff_t)(height - 1) * src_stride_argb; + src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride_argb == width * 4 && dst_stride_bgra == width * 4 && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_argb == width * 4 && dst_stride_bgra == width * 4) { width *= height; height = 1; src_stride_argb = dst_stride_bgra = 0; @@ -3503,19 +3479,17 @@ int ARGBToABGR(const uint8_t* src_argb, int y; void (*ARGBToABGRRow)(const uint8_t* src_argb, uint8_t* dst_abgr, int width) = ARGBToABGRRow_C; - if (!src_argb || !dst_abgr || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_argb || !dst_abgr || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_argb = src_argb + (ptrdiff_t)(height - 1) * src_stride_argb; + src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride_argb == width * 4 && dst_stride_abgr == width * 4 && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_argb == width * 4 && dst_stride_abgr == width * 4) { width *= height; height = 1; src_stride_argb = dst_stride_abgr = 0; @@ -3558,19 +3532,17 @@ int RGBAToARGB(const uint8_t* src_rgba, int y; void (*RGBAToARGBRow)(const uint8_t* src_rgba, uint8_t* dst_argb, int width) = RGBAToARGBRow_C; - if (!src_rgba || !dst_argb || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_rgba || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_rgba = src_rgba + (ptrdiff_t)(height - 1) * src_stride_rgba; + src_rgba = src_rgba + (height - 1) * src_stride_rgba; src_stride_rgba = -src_stride_rgba; } // Coalesce rows. - if (src_stride_rgba == width * 4 && dst_stride_argb == width * 4 && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_rgba == width * 4 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_rgba = dst_stride_argb = 0; @@ -3601,19 +3573,17 @@ int AR64ToAB64(const uint16_t* src_ar64, int y; void (*AR64ToAB64Row)(const uint16_t* src_ar64, uint16_t* dst_ab64, int width) = AR64ToAB64Row_C; - if (!src_ar64 || !dst_ab64 || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_ar64 || !dst_ab64 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_ar64 = src_ar64 + (ptrdiff_t)(height - 1) * src_stride_ar64; + src_ar64 = src_ar64 + (height - 1) * src_stride_ar64; src_stride_ar64 = -src_stride_ar64; } // Coalesce rows. - if (src_stride_ar64 == width * 4 && dst_stride_ab64 == width * 4 && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_ar64 == width * 4 && dst_stride_ab64 == width * 4) { width *= height; height = 1; src_stride_ar64 = dst_stride_ab64 = 0; @@ -3645,19 +3615,17 @@ int RGB24ToARGB(const uint8_t* src_rgb24, int y; void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = RGB24ToARGBRow_C; - if (!src_rgb24 || !dst_argb || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_rgb24 || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_rgb24 = src_rgb24 + (ptrdiff_t)(height - 1) * src_stride_rgb24; + src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24; src_stride_rgb24 = -src_stride_rgb24; } // Coalesce rows. - if (src_stride_rgb24 == width * 3 && dst_stride_argb == width * 4 && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_rgb24 == width * 3 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_rgb24 = dst_stride_argb = 0; @@ -3720,7 +3688,7 @@ int RGB24ToARGB(const uint8_t* src_rgb24, RGB24ToARGBRow = RGB24ToARGBRow_RVV; } #endif - for (y = 0; y < height; ++y) { +for (y = 0; y < height; ++y) { RGB24ToARGBRow(src_rgb24, dst_argb, width); src_rgb24 += src_stride_rgb24; dst_argb += dst_stride_argb; @@ -3739,18 +3707,17 @@ int RAWToARGB(const uint8_t* src_raw, int y; void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = RAWToARGBRow_C; - if (!src_raw || !dst_argb || width <= 0 || height == 0 || height == INT_MIN) { + if (!src_raw || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_raw = src_raw + (ptrdiff_t)(height - 1) * src_stride_raw; + src_raw = src_raw + (height - 1) * src_stride_raw; src_stride_raw = -src_stride_raw; } // Coalesce rows. - if (src_stride_raw == width * 3 && dst_stride_argb == width * 4 && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_raw == width * 3 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_raw = dst_stride_argb = 0; @@ -3833,18 +3800,17 @@ int RAWToRGBA(const uint8_t* src_raw, int y; void (*RAWToRGBARow)(const uint8_t* src_rgb, uint8_t* dst_rgba, int width) = RAWToRGBARow_C; - if (!src_raw || !dst_rgba || width <= 0 || height == 0 || height == INT_MIN) { + if (!src_raw || !dst_rgba || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_raw = src_raw + (ptrdiff_t)(height - 1) * src_stride_raw; + src_raw = src_raw + (height - 1) * src_stride_raw; src_stride_raw = -src_stride_raw; } // Coalesce rows. - if (src_stride_raw == width * 3 && dst_stride_rgba == width * 4 && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_raw == width * 3 && dst_stride_rgba == width * 4) { width *= height; height = 1; src_stride_raw = dst_stride_rgba = 0; @@ -3895,24 +3861,29 @@ int RGB565ToARGB(const uint8_t* src_rgb565, int y; void (*RGB565ToARGBRow)(const uint8_t* src_rgb565, uint8_t* dst_argb, int width) = RGB565ToARGBRow_C; - if (!src_rgb565 || !dst_argb || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_rgb565 || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_rgb565 = src_rgb565 + (ptrdiff_t)(height - 1) * src_stride_rgb565; + src_rgb565 = src_rgb565 + (height - 1) * src_stride_rgb565; src_stride_rgb565 = -src_stride_rgb565; } // Coalesce rows. - if (src_stride_rgb565 == width * 2 && dst_stride_argb == width * 4 && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_rgb565 == width * 2 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_rgb565 = dst_stride_argb = 0; } - +#if defined(HAS_RGB565TOARGBROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + RGB565ToARGBRow = RGB565ToARGBRow_Any_SSE2; + if (IS_ALIGNED(width, 8)) { + RGB565ToARGBRow = RGB565ToARGBRow_SSE2; + } + } +#endif #if defined(HAS_RGB565TOARGBROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { RGB565ToARGBRow = RGB565ToARGBRow_Any_AVX2; @@ -3965,24 +3936,29 @@ int ARGB1555ToARGB(const uint8_t* src_argb1555, int y; void (*ARGB1555ToARGBRow)(const uint8_t* src_argb1555, uint8_t* dst_argb, int width) = ARGB1555ToARGBRow_C; - if (!src_argb1555 || !dst_argb || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_argb1555 || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_argb1555 = src_argb1555 + (ptrdiff_t)(height - 1) * src_stride_argb1555; + src_argb1555 = src_argb1555 + (height - 1) * src_stride_argb1555; src_stride_argb1555 = -src_stride_argb1555; } // Coalesce rows. - if (src_stride_argb1555 == width * 2 && dst_stride_argb == width * 4 && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_argb1555 == width * 2 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_argb1555 = dst_stride_argb = 0; } - +#if defined(HAS_ARGB1555TOARGBROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_SSE2; + if (IS_ALIGNED(width, 8)) { + ARGB1555ToARGBRow = ARGB1555ToARGBRow_SSE2; + } + } +#endif #if defined(HAS_ARGB1555TOARGBROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_AVX2; @@ -4040,24 +4016,29 @@ int ARGB4444ToARGB(const uint8_t* src_argb4444, int y; void (*ARGB4444ToARGBRow)(const uint8_t* src_argb4444, uint8_t* dst_argb, int width) = ARGB4444ToARGBRow_C; - if (!src_argb4444 || !dst_argb || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_argb4444 || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_argb4444 = src_argb4444 + (ptrdiff_t)(height - 1) * src_stride_argb4444; + src_argb4444 = src_argb4444 + (height - 1) * src_stride_argb4444; src_stride_argb4444 = -src_stride_argb4444; } // Coalesce rows. - if (src_stride_argb4444 == width * 2 && dst_stride_argb == width * 4 && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_argb4444 == width * 2 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_argb4444 = dst_stride_argb = 0; } - +#if defined(HAS_ARGB4444TOARGBROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_SSE2; + if (IS_ALIGNED(width, 8)) { + ARGB4444ToARGBRow = ARGB4444ToARGBRow_SSE2; + } + } +#endif #if defined(HAS_ARGB4444TOARGBROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_AVX2; @@ -4108,19 +4089,17 @@ int AR30ToARGB(const uint8_t* src_ar30, int width, int height) { int y; - if (!src_ar30 || !dst_argb || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_ar30 || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_ar30 = src_ar30 + (ptrdiff_t)(height - 1) * src_stride_ar30; + src_ar30 = src_ar30 + (height - 1) * src_stride_ar30; src_stride_ar30 = -src_stride_ar30; } // Coalesce rows. - if (src_stride_ar30 == width * 4 && dst_stride_argb == width * 4 && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_ar30 == width * 4 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_ar30 = dst_stride_argb = 0; @@ -4142,19 +4121,17 @@ int AR30ToABGR(const uint8_t* src_ar30, int width, int height) { int y; - if (!src_ar30 || !dst_abgr || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_ar30 || !dst_abgr || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_ar30 = src_ar30 + (ptrdiff_t)(height - 1) * src_stride_ar30; + src_ar30 = src_ar30 + (height - 1) * src_stride_ar30; src_stride_ar30 = -src_stride_ar30; } // Coalesce rows. - if (src_stride_ar30 == width * 4 && dst_stride_abgr == width * 4 && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_ar30 == width * 4 && dst_stride_abgr == width * 4) { width *= height; height = 1; src_stride_ar30 = dst_stride_abgr = 0; @@ -4176,19 +4153,17 @@ int AR30ToAB30(const uint8_t* src_ar30, int width, int height) { int y; - if (!src_ar30 || !dst_ab30 || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_ar30 || !dst_ab30 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_ar30 = src_ar30 + (ptrdiff_t)(height - 1) * src_stride_ar30; + src_ar30 = src_ar30 + (height - 1) * src_stride_ar30; src_stride_ar30 = -src_stride_ar30; } // Coalesce rows. - if (src_stride_ar30 == width * 4 && dst_stride_ab30 == width * 4 && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_ar30 == width * 4 && dst_stride_ab30 == width * 4) { width *= height; height = 1; src_stride_ar30 = dst_stride_ab30 = 0; @@ -4212,19 +4187,17 @@ int AR64ToARGB(const uint16_t* src_ar64, int y; void (*AR64ToARGBRow)(const uint16_t* src_ar64, uint8_t* dst_argb, int width) = AR64ToARGBRow_C; - if (!src_ar64 || !dst_argb || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_ar64 || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_ar64 = src_ar64 + (ptrdiff_t)(height - 1) * src_stride_ar64; + src_ar64 = src_ar64 + (height - 1) * src_stride_ar64; src_stride_ar64 = -src_stride_ar64; } // Coalesce rows. - if (src_stride_ar64 == width * 4 && dst_stride_argb == width * 4 && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_ar64 == width * 4 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_ar64 = dst_stride_argb = 0; @@ -4278,19 +4251,17 @@ int AB64ToARGB(const uint16_t* src_ab64, int y; void (*AB64ToARGBRow)(const uint16_t* src_ar64, uint8_t* dst_argb, int width) = AB64ToARGBRow_C; - if (!src_ab64 || !dst_argb || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_ab64 || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_ab64 = src_ab64 + (ptrdiff_t)(height - 1) * src_stride_ab64; + src_ab64 = src_ab64 + (height - 1) * src_stride_ab64; src_stride_ab64 = -src_stride_ab64; } // Coalesce rows. - if (src_stride_ab64 == width * 4 && dst_stride_argb == width * 4 && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_ab64 == width * 4 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_ab64 = dst_stride_argb = 0; @@ -4349,14 +4320,13 @@ int NV12ToARGBMatrix(const uint8_t* src_y, const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = NV12ToARGBRow_C; assert(yuvconstants); - if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_argb = dst_argb + (ptrdiff_t)(height - 1) * dst_stride_argb; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } #if defined(HAS_NV12TOARGBROW_SSSE3) @@ -4442,14 +4412,13 @@ int NV21ToARGBMatrix(const uint8_t* src_y, const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = NV21ToARGBRow_C; assert(yuvconstants); - if (!src_y || !src_vu || !dst_argb || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_vu || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_argb = dst_argb + (ptrdiff_t)(height - 1) * dst_stride_argb; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } #if defined(HAS_NV21TOARGBROW_SSSE3) @@ -4594,14 +4563,13 @@ int NV12ToRGB24Matrix(const uint8_t* src_y, const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = NV12ToRGB24Row_C; assert(yuvconstants); - if (!src_y || !src_uv || !dst_rgb24 || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_uv || !dst_rgb24 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_rgb24 = dst_rgb24 + (ptrdiff_t)(height - 1) * dst_stride_rgb24; + dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24; dst_stride_rgb24 = -dst_stride_rgb24; } #if defined(HAS_NV12TORGB24ROW_NEON) @@ -4671,14 +4639,13 @@ int NV21ToRGB24Matrix(const uint8_t* src_y, const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = NV21ToRGB24Row_C; assert(yuvconstants); - if (!src_y || !src_vu || !dst_rgb24 || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_vu || !dst_rgb24 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_rgb24 = dst_rgb24 + (ptrdiff_t)(height - 1) * dst_stride_rgb24; + dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24; dst_stride_rgb24 = -dst_stride_rgb24; } #if defined(HAS_NV21TORGB24ROW_NEON) @@ -4802,14 +4769,13 @@ int NV21ToYUV24(const uint8_t* src_y, int y; void (*NV21ToYUV24Row)(const uint8_t* src_y, const uint8_t* src_vu, uint8_t* dst_yuv24, int width) = NV21ToYUV24Row_C; - if (!src_y || !src_vu || !dst_yuv24 || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_vu || !dst_yuv24 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_yuv24 = dst_yuv24 + (ptrdiff_t)(height - 1) * dst_stride_yuv24; + dst_yuv24 = dst_yuv24 + (height - 1) * dst_stride_yuv24; dst_stride_yuv24 = -dst_stride_yuv24; } #if defined(HAS_NV21TOYUV24ROW_NEON) @@ -4860,19 +4826,17 @@ int YUY2ToARGBMatrix(const uint8_t* src_yuy2, void (*YUY2ToARGBRow)(const uint8_t* src_yuy2, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) = YUY2ToARGBRow_C; - if (!src_yuy2 || !dst_argb || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_yuy2 || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_yuy2 = src_yuy2 + (ptrdiff_t)(height - 1) * src_stride_yuy2; + src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2; src_stride_yuy2 = -src_stride_yuy2; } // Coalesce rows. - if (src_stride_yuy2 == width * 2 && dst_stride_argb == width * 4 && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_yuy2 == width * 2 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_yuy2 = dst_stride_argb = 0; @@ -4952,19 +4916,17 @@ int UYVYToARGBMatrix(const uint8_t* src_uyvy, void (*UYVYToARGBRow)(const uint8_t* src_uyvy, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) = UYVYToARGBRow_C; - if (!src_uyvy || !dst_argb || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_uyvy || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_uyvy = src_uyvy + (ptrdiff_t)(height - 1) * src_stride_uyvy; + src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy; src_stride_uyvy = -src_stride_uyvy; } // Coalesce rows. - if (src_stride_uyvy == width * 2 && dst_stride_argb == width * 4 && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_uyvy == width * 2 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_uyvy = dst_stride_argb = 0; @@ -5066,15 +5028,14 @@ int Android420ToARGBMatrix(const uint8_t* src_y, int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; assert(yuvconstants); - if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; halfheight = (height + 1) >> 1; - dst_argb = dst_argb + (ptrdiff_t)(height - 1) * dst_stride_argb; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } @@ -5097,10 +5058,7 @@ int Android420ToARGBMatrix(const uint8_t* src_y, } // General case fallback creates NV12 - const uint64_t uv_size = (uint64_t)halfwidth * 2 * halfheight; - if (uv_size > SIZE_MAX) - return 1; - align_buffer_64(plane_uv, (size_t)uv_size); + align_buffer_64(plane_uv, halfwidth * 2 * halfheight); if (!plane_uv) return 1; dst_uv = plane_uv; @@ -5173,14 +5131,13 @@ int I422ToRGBAMatrix(const uint8_t* src_y, const struct YuvConstants* yuvconstants, int width) = I422ToRGBARow_C; assert(yuvconstants); - if (!src_y || !src_u || !src_v || !dst_rgba || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_u || !src_v || !dst_rgba || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_rgba = dst_rgba + (ptrdiff_t)(height - 1) * dst_stride_rgba; + dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba; dst_stride_rgba = -dst_stride_rgba; } #if defined(HAS_I422TORGBAROW_SSSE3) @@ -5301,14 +5258,13 @@ int NV12ToRGB565Matrix(const uint8_t* src_y, const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = NV12ToRGB565Row_C; assert(yuvconstants); - if (!src_y || !src_uv || !dst_rgb565 || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_uv || !dst_rgb565 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_rgb565 = dst_rgb565 + (ptrdiff_t)(height - 1) * dst_stride_rgb565; + dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565; dst_stride_rgb565 = -dst_stride_rgb565; } #if defined(HAS_NV12TORGB565ROW_SSSE3) @@ -5397,14 +5353,13 @@ int I420ToRGBAMatrix(const uint8_t* src_y, const struct YuvConstants* yuvconstants, int width) = I422ToRGBARow_C; assert(yuvconstants); - if (!src_y || !src_u || !src_v || !dst_rgba || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_u || !src_v || !dst_rgba || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_rgba = dst_rgba + (ptrdiff_t)(height - 1) * dst_stride_rgba; + dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba; dst_stride_rgba = -dst_stride_rgba; } #if defined(HAS_I422TORGBAROW_SSSE3) @@ -5530,14 +5485,13 @@ int I420ToRGB24Matrix(const uint8_t* src_y, const struct YuvConstants* yuvconstants, int width) = I422ToRGB24Row_C; assert(yuvconstants); - if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_rgb24 = dst_rgb24 + (ptrdiff_t)(height - 1) * dst_stride_rgb24; + dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24; dst_stride_rgb24 = -dst_stride_rgb24; } #if defined(HAS_I422TORGB24ROW_SSSE3) @@ -5556,22 +5510,6 @@ int I420ToRGB24Matrix(const uint8_t* src_y, } } #endif -#if defined(HAS_I422TORGB24ROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - I422ToRGB24Row = I422ToRGB24Row_Any_AVX512BW; - if (IS_ALIGNED(width, 32)) { - I422ToRGB24Row = I422ToRGB24Row_AVX512BW; - } - } -#endif -#if defined(HAS_I422TORGB24ROW_AVX512VBMI) - if (TestCpuFlag(kCpuHasAVX512VBMI)) { - I422ToRGB24Row = I422ToRGB24Row_Any_AVX512VBMI; - if (IS_ALIGNED(width, 32)) { - I422ToRGB24Row = I422ToRGB24Row_AVX512VBMI; - } - } -#endif #if defined(HAS_I422TORGB24ROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I422ToRGB24Row = I422ToRGB24Row_Any_NEON; @@ -5751,14 +5689,13 @@ int I422ToRGB24Matrix(const uint8_t* src_y, const struct YuvConstants* yuvconstants, int width) = I422ToRGB24Row_C; assert(yuvconstants); - if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_rgb24 = dst_rgb24 + (ptrdiff_t)(height - 1) * dst_stride_rgb24; + dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24; dst_stride_rgb24 = -dst_stride_rgb24; } #if defined(HAS_I422TORGB24ROW_SSSE3) @@ -5777,22 +5714,6 @@ int I422ToRGB24Matrix(const uint8_t* src_y, } } #endif -#if defined(HAS_I422TORGB24ROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - I422ToRGB24Row = I422ToRGB24Row_Any_AVX512BW; - if (IS_ALIGNED(width, 32)) { - I422ToRGB24Row = I422ToRGB24Row_AVX512BW; - } - } -#endif -#if defined(HAS_I422TORGB24ROW_AVX512VBMI) - if (TestCpuFlag(kCpuHasAVX512VBMI)) { - I422ToRGB24Row = I422ToRGB24Row_Any_AVX512VBMI; - if (IS_ALIGNED(width, 32)) { - I422ToRGB24Row = I422ToRGB24Row_AVX512VBMI; - } - } -#endif #if defined(HAS_I422TORGB24ROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I422ToRGB24Row = I422ToRGB24Row_Any_NEON; @@ -5897,13 +5818,13 @@ int I420ToARGB1555(const uint8_t* src_y, const struct YuvConstants* yuvconstants, int width) = I422ToARGB1555Row_C; if (!src_y || !src_u || !src_v || !dst_argb1555 || width <= 0 || - height == 0 || height == INT_MIN) { + height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_argb1555 = dst_argb1555 + (ptrdiff_t)(height - 1) * dst_stride_argb1555; + dst_argb1555 = dst_argb1555 + (height - 1) * dst_stride_argb1555; dst_stride_argb1555 = -dst_stride_argb1555; } #if defined(HAS_I422TOARGB1555ROW_SSSE3) @@ -5988,13 +5909,13 @@ int I420ToARGB4444(const uint8_t* src_y, const struct YuvConstants* yuvconstants, int width) = I422ToARGB4444Row_C; if (!src_y || !src_u || !src_v || !dst_argb4444 || width <= 0 || - height == 0 || height == INT_MIN) { + height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_argb4444 = dst_argb4444 + (ptrdiff_t)(height - 1) * dst_stride_argb4444; + dst_argb4444 = dst_argb4444 + (height - 1) * dst_stride_argb4444; dst_stride_argb4444 = -dst_stride_argb4444; } #if defined(HAS_I422TOARGB4444ROW_SSSE3) @@ -6080,14 +6001,13 @@ int I420ToRGB565Matrix(const uint8_t* src_y, const struct YuvConstants* yuvconstants, int width) = I422ToRGB565Row_C; assert(yuvconstants); - if (!src_y || !src_u || !src_v || !dst_rgb565 || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_u || !src_v || !dst_rgb565 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_rgb565 = dst_rgb565 + (ptrdiff_t)(height - 1) * dst_stride_rgb565; + dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565; dst_stride_rgb565 = -dst_stride_rgb565; } #if defined(HAS_I422TORGB565ROW_SSSE3) @@ -6223,14 +6143,13 @@ int I422ToRGB565Matrix(const uint8_t* src_y, const struct YuvConstants* yuvconstants, int width) = I422ToRGB565Row_C; assert(yuvconstants); - if (!src_y || !src_u || !src_v || !dst_rgb565 || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_u || !src_v || !dst_rgb565 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_rgb565 = dst_rgb565 + (ptrdiff_t)(height - 1) * dst_stride_rgb565; + dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565; dst_stride_rgb565 = -dst_stride_rgb565; } #if defined(HAS_I422TORGB565ROW_SSSE3) @@ -6337,14 +6256,13 @@ int I420ToRGB565Dither(const uint8_t* src_y, void (*ARGBToRGB565DitherRow)(const uint8_t* src_argb, uint8_t* dst_rgb, uint32_t dither4, int width) = ARGBToRGB565DitherRow_C; - if (!src_y || !src_u || !src_v || !dst_rgb565 || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_u || !src_v || !dst_rgb565 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_rgb565 = dst_rgb565 + (ptrdiff_t)(height - 1) * dst_stride_rgb565; + dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565; dst_stride_rgb565 = -dst_stride_rgb565; } if (!dither4x4) { @@ -6501,14 +6419,13 @@ int I420ToAR30Matrix(const uint8_t* src_y, I422ToAR30Row_C; assert(yuvconstants); - if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_ar30 = dst_ar30 + (ptrdiff_t)(height - 1) * dst_stride_ar30; + dst_ar30 = dst_ar30 + (height - 1) * dst_stride_ar30; dst_stride_ar30 = -dst_stride_ar30; } @@ -6649,14 +6566,13 @@ static int I420ToARGBMatrixBilinear(const uint8_t* src_y, void (*ScaleRowUp2_Linear)(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) = ScaleRowUp2_Linear_Any_C; assert(yuvconstants); - if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_argb = dst_argb + (ptrdiff_t)(height - 1) * dst_stride_argb; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } #if defined(HAS_I444TOARGBROW_SSSE3) @@ -6799,14 +6715,13 @@ static int I422ToARGBMatrixLinear(const uint8_t* src_y, void (*ScaleRowUp2_Linear)(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) = ScaleRowUp2_Linear_Any_C; assert(yuvconstants); - if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_argb = dst_argb + (ptrdiff_t)(height - 1) * dst_stride_argb; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } #if defined(HAS_I444TOARGBROW_SSSE3) @@ -6926,14 +6841,13 @@ static int I420ToRGB24MatrixBilinear(const uint8_t* src_y, void (*ScaleRowUp2_Linear)(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) = ScaleRowUp2_Linear_Any_C; assert(yuvconstants); - if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_rgb24 = dst_rgb24 + (ptrdiff_t)(height - 1) * dst_stride_rgb24; + dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24; dst_stride_rgb24 = -dst_stride_rgb24; } #if defined(HAS_I444TORGB24ROW_SSSE3) @@ -7079,14 +6993,13 @@ static int I010ToAR30MatrixBilinear(const uint16_t* src_y, void (*ScaleRowUp2_Linear_12)(const uint16_t* src_ptr, uint16_t* dst_ptr, int dst_width) = ScaleRowUp2_Linear_16_Any_C; assert(yuvconstants); - if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_ar30 = dst_ar30 + (ptrdiff_t)(height - 1) * dst_stride_ar30; + dst_ar30 = dst_ar30 + (height - 1) * dst_stride_ar30; dst_stride_ar30 = -dst_stride_ar30; } #if defined(HAS_I410TOAR30ROW_NEON) @@ -7204,14 +7117,13 @@ static int I210ToAR30MatrixLinear(const uint16_t* src_y, void (*ScaleRowUp2_Linear_12)(const uint16_t* src_ptr, uint16_t* dst_ptr, int dst_width) = ScaleRowUp2_Linear_16_Any_C; assert(yuvconstants); - if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_ar30 = dst_ar30 + (ptrdiff_t)(height - 1) * dst_stride_ar30; + dst_ar30 = dst_ar30 + (height - 1) * dst_stride_ar30; dst_stride_ar30 = -dst_stride_ar30; } #if defined(HAS_I410TOAR30ROW_NEON) @@ -7308,14 +7220,13 @@ static int I010ToARGBMatrixBilinear(const uint16_t* src_y, void (*ScaleRowUp2_Linear_12)(const uint16_t* src_ptr, uint16_t* dst_ptr, int dst_width) = ScaleRowUp2_Linear_16_Any_C; assert(yuvconstants); - if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_argb = dst_argb + (ptrdiff_t)(height - 1) * dst_stride_argb; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } #if defined(HAS_I410TOARGBROW_SSSE3) @@ -7432,14 +7343,13 @@ static int I210ToARGBMatrixLinear(const uint16_t* src_y, void (*ScaleRowUp2_Linear_12)(const uint16_t* src_ptr, uint16_t* dst_ptr, int dst_width) = ScaleRowUp2_Linear_16_Any_C; assert(yuvconstants); - if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_argb = dst_argb + (ptrdiff_t)(height - 1) * dst_stride_argb; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } #if defined(HAS_I410TOARGBROW_SSSE3) @@ -7545,13 +7455,13 @@ static int I420AlphaToARGBMatrixBilinear( int dst_width) = ScaleRowUp2_Linear_Any_C; assert(yuvconstants); if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 || - height == 0 || height == INT_MIN) { + height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_argb = dst_argb + (ptrdiff_t)(height - 1) * dst_stride_argb; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } #if defined(HAS_I444ALPHATOARGBROW_SSSE3) @@ -7765,13 +7675,13 @@ static int I422AlphaToARGBMatrixLinear(const uint8_t* src_y, int dst_width) = ScaleRowUp2_Linear_Any_C; assert(yuvconstants); if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 || - height == 0 || height == INT_MIN) { + height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_argb = dst_argb + (ptrdiff_t)(height - 1) * dst_stride_argb; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } #if defined(HAS_I444ALPHATOARGBROW_SSSE3) @@ -7950,13 +7860,13 @@ static int I010AlphaToARGBMatrixBilinear( int dst_width) = ScaleRowUp2_Linear_16_Any_C; assert(yuvconstants); if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 || - height == 0 || height == INT_MIN) { + height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_argb = dst_argb + (ptrdiff_t)(height - 1) * dst_stride_argb; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } #if defined(HAS_I410ALPHATOARGBROW_NEON) @@ -8144,13 +8054,13 @@ static int I210AlphaToARGBMatrixLinear(const uint16_t* src_y, int dst_width) = ScaleRowUp2_Linear_16_Any_C; assert(yuvconstants); if (!src_y || !src_u || !src_v || !src_a || !dst_argb || width <= 0 || - height == 0 || height == INT_MIN) { + height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_argb = dst_argb + (ptrdiff_t)(height - 1) * dst_stride_argb; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } #if defined(HAS_I410ALPHATOARGBROW_NEON) @@ -8292,14 +8202,13 @@ static int P010ToARGBMatrixBilinear(const uint16_t* src_y, const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = ScaleUVRowUp2_Bilinear_16_Any_C; assert(yuvconstants); - if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_argb = dst_argb + (ptrdiff_t)(height - 1) * dst_stride_argb; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } #if defined(HAS_P410TOARGBROW_SSSE3) @@ -8404,14 +8313,13 @@ static int P210ToARGBMatrixLinear(const uint16_t* src_y, void (*ScaleRowUp2_Linear)(const uint16_t* src_uv, uint16_t* dst_uv, int dst_width) = ScaleUVRowUp2_Linear_16_Any_C; assert(yuvconstants); - if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_argb = dst_argb + (ptrdiff_t)(height - 1) * dst_stride_argb; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } #if defined(HAS_P410TOARGBROW_SSSE3) @@ -8502,14 +8410,13 @@ static int P010ToAR30MatrixBilinear(const uint16_t* src_y, const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) = ScaleUVRowUp2_Bilinear_16_Any_C; assert(yuvconstants); - if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_ar30 = dst_ar30 + (ptrdiff_t)(height - 1) * dst_stride_ar30; + dst_ar30 = dst_ar30 + (height - 1) * dst_stride_ar30; dst_stride_ar30 = -dst_stride_ar30; } #if defined(HAS_P410TOAR30ROW_SSSE3) @@ -8614,14 +8521,13 @@ static int P210ToAR30MatrixLinear(const uint16_t* src_y, void (*ScaleRowUp2_Linear)(const uint16_t* src_uv, uint16_t* dst_uv, int dst_width) = ScaleUVRowUp2_Linear_16_Any_C; assert(yuvconstants); - if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_uv || !dst_ar30 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_ar30 = dst_ar30 + (ptrdiff_t)(height - 1) * dst_stride_ar30; + dst_ar30 = dst_ar30 + (height - 1) * dst_stride_ar30; dst_stride_ar30 = -dst_stride_ar30; } #if defined(HAS_P410TOAR30ROW_SSSE3) @@ -8714,14 +8620,13 @@ static int I422ToRGB24MatrixLinear(const uint8_t* src_y, void (*ScaleRowUp2_Linear)(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) = ScaleRowUp2_Linear_Any_C; assert(yuvconstants); - if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_rgb24 = dst_rgb24 + (ptrdiff_t)(height - 1) * dst_stride_rgb24; + dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24; dst_stride_rgb24 = -dst_stride_rgb24; } #if defined(HAS_I444TORGB24ROW_SSSE3) diff --git a/source/convert_from.cc b/source/convert_from.cc index 40ca02190..5cf88fa2d 100644 --- a/source/convert_from.cc +++ b/source/convert_from.cc @@ -10,8 +10,6 @@ #include "libyuv/convert_from.h" -#include - #include "libyuv/basic_types.h" #include "libyuv/convert.h" // For I420Copy #include "libyuv/cpu_id.h" @@ -89,16 +87,16 @@ int I420ToI010(const uint8_t* src_y, int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 || - height == 0 || height == INT_MIN) { + height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; halfheight = (height + 1) >> 1; - src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y; - src_u = src_u + (ptrdiff_t)(halfheight - 1) * src_stride_u; - src_v = src_v + (ptrdiff_t)(halfheight - 1) * src_stride_v; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (halfheight - 1) * src_stride_u; + src_v = src_v + (halfheight - 1) * src_stride_v; src_stride_y = -src_stride_y; src_stride_u = -src_stride_u; src_stride_v = -src_stride_v; @@ -134,16 +132,16 @@ int I420ToI012(const uint8_t* src_y, int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 || - height == 0 || height == INT_MIN) { + height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; halfheight = (height + 1) >> 1; - src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y; - src_u = src_u + (ptrdiff_t)(halfheight - 1) * src_stride_u; - src_v = src_v + (ptrdiff_t)(halfheight - 1) * src_stride_v; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (halfheight - 1) * src_stride_u; + src_v = src_v + (halfheight - 1) * src_stride_v; src_stride_y = -src_stride_y; src_stride_u = -src_stride_u; src_stride_v = -src_stride_v; @@ -228,7 +226,7 @@ int I010ToI410(const uint16_t* src_y, int height) { int r; if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 || - height == 0 || height == INT_MIN) { + height == 0) { return -1; } @@ -265,7 +263,7 @@ int I210ToI410(const uint16_t* src_y, int height) { int r; if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 || - height == 0 || height == INT_MIN) { + height == 0) { return -1; } @@ -301,7 +299,7 @@ int I422ToI444(const uint8_t* src_y, int height) { int r; if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 || - height == 0 || height == INT_MIN) { + height == 0) { return -1; } @@ -326,7 +324,7 @@ int I400Copy(const uint8_t* src_y, int dst_stride_y, int width, int height) { - if (!src_y || !dst_y || width <= 0 || height == 0 || height == INT_MIN) { + if (!src_y || !dst_y || width <= 0 || height == 0) { return -1; } CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); @@ -348,20 +346,18 @@ int I422ToYUY2(const uint8_t* src_y, void (*I422ToYUY2Row)(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_yuy2, int width) = I422ToYUY2Row_C; - if (!src_y || !src_u || !src_v || !dst_yuy2 || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_u || !src_v || !dst_yuy2 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_yuy2 = dst_yuy2 + (ptrdiff_t)(height - 1) * dst_stride_yuy2; + dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2; dst_stride_yuy2 = -dst_stride_yuy2; } // Coalesce rows. if (src_stride_y == width && src_stride_u * 2 == width && - src_stride_v * 2 == width && dst_stride_yuy2 == width * 2 && - (ptrdiff_t)width * height <= INT_MAX) { + src_stride_v * 2 == width && dst_stride_yuy2 == width * 2) { width *= height; height = 1; src_stride_y = src_stride_u = src_stride_v = dst_stride_yuy2 = 0; @@ -416,14 +412,13 @@ int I420ToYUY2(const uint8_t* src_y, void (*I422ToYUY2Row)(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_yuy2, int width) = I422ToYUY2Row_C; - if (!src_y || !src_u || !src_v || !dst_yuy2 || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_u || !src_v || !dst_yuy2 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_yuy2 = dst_yuy2 + (ptrdiff_t)(height - 1) * dst_stride_yuy2; + dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2; dst_stride_yuy2 = -dst_stride_yuy2; } #if defined(HAS_I422TOYUY2ROW_SSE2) @@ -497,20 +492,18 @@ int I422ToUYVY(const uint8_t* src_y, void (*I422ToUYVYRow)(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uyvy, int width) = I422ToUYVYRow_C; - if (!src_y || !src_u || !src_v || !dst_uyvy || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_u || !src_v || !dst_uyvy || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_uyvy = dst_uyvy + (ptrdiff_t)(height - 1) * dst_stride_uyvy; + dst_uyvy = dst_uyvy + (height - 1) * dst_stride_uyvy; dst_stride_uyvy = -dst_stride_uyvy; } // Coalesce rows. if (src_stride_y == width && src_stride_u * 2 == width && - src_stride_v * 2 == width && dst_stride_uyvy == width * 2 && - (ptrdiff_t)width * height <= INT_MAX) { + src_stride_v * 2 == width && dst_stride_uyvy == width * 2) { width *= height; height = 1; src_stride_y = src_stride_u = src_stride_v = dst_stride_uyvy = 0; @@ -581,14 +574,13 @@ int I420ToUYVY(const uint8_t* src_y, void (*I422ToUYVYRow)(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uyvy, int width) = I422ToUYVYRow_C; - if (!src_y || !src_u || !src_v || !dst_uyvy || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !src_u || !src_v || !dst_uyvy || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_uyvy = dst_uyvy + (ptrdiff_t)(height - 1) * dst_stride_uyvy; + dst_uyvy = dst_uyvy + (height - 1) * dst_stride_uyvy; dst_stride_uyvy = -dst_stride_uyvy; } #if defined(HAS_I422TOUYVYROW_SSE2) @@ -663,16 +655,16 @@ int I420ToNV12(const uint8_t* src_y, int halfwidth = (width + 1) / 2; int halfheight = (height + 1) / 2; if ((!src_y && dst_y) || !src_u || !src_v || !dst_uv || width <= 0 || - height == 0 || height == INT_MIN) { + height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; halfheight = (height + 1) >> 1; - src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y; - src_u = src_u + (ptrdiff_t)(halfheight - 1) * src_stride_u; - src_v = src_v + (ptrdiff_t)(halfheight - 1) * src_stride_v; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (halfheight - 1) * src_stride_u; + src_v = src_v + (halfheight - 1) * src_stride_v; src_stride_y = -src_stride_y; src_stride_u = -src_stride_u; src_stride_v = -src_stride_v; @@ -718,8 +710,7 @@ int ConvertFromI420(const uint8_t* y, uint32_t fourcc) { uint32_t format = CanonicalFourCC(fourcc); int r = 0; - if (!y || !u || !v || !dst_sample || width <= 0 || height == 0 || - height == INT_MIN) { + if (!y || !u || !v || !dst_sample || width <= 0 || height == 0) { return -1; } switch (format) { @@ -791,7 +782,7 @@ int ConvertFromI420(const uint8_t* y, break; case FOURCC_NV12: { int dst_y_stride = dst_sample_stride ? dst_sample_stride : width; - uint8_t* dst_uv = dst_sample + (ptrdiff_t)dst_y_stride * height; + uint8_t* dst_uv = dst_sample + dst_y_stride * height; r = I420ToNV12(y, y_stride, u, u_stride, v, v_stride, dst_sample, dst_sample_stride ? dst_sample_stride : width, dst_uv, dst_sample_stride ? dst_sample_stride : width, width, @@ -800,7 +791,7 @@ int ConvertFromI420(const uint8_t* y, } case FOURCC_NV21: { int dst_y_stride = dst_sample_stride ? dst_sample_stride : width; - uint8_t* dst_vu = dst_sample + (ptrdiff_t)dst_y_stride * height; + uint8_t* dst_vu = dst_sample + dst_y_stride * height; r = I420ToNV21(y, y_stride, u, u_stride, v, v_stride, dst_sample, dst_sample_stride ? dst_sample_stride : width, dst_vu, dst_sample_stride ? dst_sample_stride : width, width, @@ -816,11 +807,11 @@ int ConvertFromI420(const uint8_t* y, uint8_t* dst_u; uint8_t* dst_v; if (format == FOURCC_YV12) { - dst_v = dst_sample + (ptrdiff_t)dst_sample_stride * height; - dst_u = dst_v + (ptrdiff_t)halfstride * halfheight; + dst_v = dst_sample + dst_sample_stride * height; + dst_u = dst_v + halfstride * halfheight; } else { - dst_u = dst_sample + (ptrdiff_t)dst_sample_stride * height; - dst_v = dst_u + (ptrdiff_t)halfstride * halfheight; + dst_u = dst_sample + dst_sample_stride * height; + dst_v = dst_u + halfstride * halfheight; } r = I420Copy(y, y_stride, u, u_stride, v, v_stride, dst_sample, dst_sample_stride, dst_u, halfstride, dst_v, halfstride, @@ -834,11 +825,11 @@ int ConvertFromI420(const uint8_t* y, uint8_t* dst_u; uint8_t* dst_v; if (format == FOURCC_YV16) { - dst_v = dst_sample + (ptrdiff_t)dst_sample_stride * height; - dst_u = dst_v + (ptrdiff_t)halfstride * height; + dst_v = dst_sample + dst_sample_stride * height; + dst_u = dst_v + halfstride * height; } else { - dst_u = dst_sample + (ptrdiff_t)dst_sample_stride * height; - dst_v = dst_u + (ptrdiff_t)halfstride * height; + dst_u = dst_sample + dst_sample_stride * height; + dst_v = dst_u + halfstride * height; } r = I420ToI422(y, y_stride, u, u_stride, v, v_stride, dst_sample, dst_sample_stride, dst_u, halfstride, dst_v, halfstride, @@ -851,11 +842,11 @@ int ConvertFromI420(const uint8_t* y, uint8_t* dst_u; uint8_t* dst_v; if (format == FOURCC_YV24) { - dst_v = dst_sample + (ptrdiff_t)dst_sample_stride * height; - dst_u = dst_v + (ptrdiff_t)dst_sample_stride * height; + dst_v = dst_sample + dst_sample_stride * height; + dst_u = dst_v + dst_sample_stride * height; } else { - dst_u = dst_sample + (ptrdiff_t)dst_sample_stride * height; - dst_v = dst_u + (ptrdiff_t)dst_sample_stride * height; + dst_u = dst_sample + dst_sample_stride * height; + dst_v = dst_u + dst_sample_stride * height; } r = I420ToI444(y, y_stride, u, u_stride, v, v_stride, dst_sample, dst_sample_stride, dst_u, dst_sample_stride, dst_v, diff --git a/source/convert_from_argb.cc b/source/convert_from_argb.cc index 77b3851d4..a139c1d20 100644 --- a/source/convert_from_argb.cc +++ b/source/convert_from_argb.cc @@ -10,8 +10,6 @@ #include "libyuv/convert_from_argb.h" -#include - #include "libyuv/basic_types.h" #include "libyuv/cpu_id.h" #include "libyuv/planar_functions.h" @@ -54,9 +52,10 @@ int ARGBToI444Matrix(const uint8_t* src_argb, int y; void (*ARGBToYMatrixRow)(const uint8_t* src_argb, uint8_t* dst_y, int width, const struct ArgbConstants* c) = ARGBToYMatrixRow_C; - void (*ARGBToUV444MatrixRow)( - const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, int width, - const struct ArgbConstants* c) = ARGBToUV444MatrixRow_C; + void (*ARGBToUV444MatrixRow)(const uint8_t* src_argb, uint8_t* dst_u, + uint8_t* dst_v, int width, + const struct ArgbConstants* c) = +ARGBToUV444MatrixRow_C; #if defined(HAS_ARGBTOYMATRIXROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { @@ -153,13 +152,13 @@ int ARGBToI444Matrix(const uint8_t* src_argb, } #endif if (!src_argb || !dst_y || !dst_u || !dst_v || !argbconstants || width <= 0 || - height == 0 || height == INT_MIN) { + height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_argb = src_argb + (ptrdiff_t)(height - 1) * src_stride_argb; + src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } @@ -209,7 +208,7 @@ int ARGBToI422Matrix(const uint8_t* src_argb, void (*ARGBToUVMatrixRow)(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width, const struct ArgbConstants* c) = - ARGBToUVMatrixRow_C; +ARGBToUVMatrixRow_C; #if defined(HAS_ARGBTOYMATRIXROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { @@ -273,40 +272,10 @@ int ARGBToI422Matrix(const uint8_t* src_argb, } #endif -#if defined(HAS_ARGBTOUVMATRIXROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_NEON_I8MM) - if (TestCpuFlag(kCpuHasNEON) && TestCpuFlag(kCpuHasNeonI8MM)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON_I8MM; - if (IS_ALIGNED(width, 16)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON_I8MM; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_SVE2) - if (TestCpuFlag(kCpuHasSVE2)) { - if (IS_ALIGNED(width, 2)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_SVE2; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_SME) - if (TestCpuFlag(kCpuHasSME)) { - if (IS_ALIGNED(width, 2)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_SME; - } - } -#endif #if defined(HAS_ARGBTOUVMATRIXROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { + if (IS_ALIGNED(width, 16)) { ARGBToUVMatrixRow = ARGBToUVMatrixRow_SSSE3; } } @@ -326,15 +295,61 @@ int ARGBToI422Matrix(const uint8_t* src_argb, ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX512BW; } } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_NEON_I8MM) + if (TestCpuFlag(kCpuHasNeonI8MM)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON_I8MM; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON_I8MM; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_SVE2) + if (TestCpuFlag(kCpuHasSVE2)) { + if (IS_ALIGNED(width, 2)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_SVE2; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_SME) + if (TestCpuFlag(kCpuHasSME)) { + if (IS_ALIGNED(width, 2)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_SME; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_LSX; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_LASX; + } + } #endif if (!src_argb || !dst_y || !dst_u || !dst_v || !argbconstants || width <= 0 || - height == 0 || height == INT_MIN) { + height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_argb = src_argb + (ptrdiff_t)(height - 1) * src_stride_argb; + src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } @@ -358,9 +373,8 @@ int ARGBToNV12(const uint8_t* src_argb, int dst_stride_uv, int width, int height) { - return ARGBToNV12Matrix(src_argb, src_stride_argb, dst_y, dst_stride_y, - dst_uv, dst_stride_uv, &kArgbI601Constants, width, - height); + return ARGBToNV12Matrix(src_argb, src_stride_argb, dst_y, dst_stride_y, dst_uv, + dst_stride_uv, &kArgbI601Constants, width, height); } LIBYUV_API @@ -380,7 +394,7 @@ int ARGBToNV12Matrix(const uint8_t* src_argb, void (*ARGBToUVMatrixRow)(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width, const struct ArgbConstants* c) = - ARGBToUVMatrixRow_C; +ARGBToUVMatrixRow_C; #if defined(HAS_ARGBTOYMATRIXROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { @@ -444,467 +458,14 @@ int ARGBToNV12Matrix(const uint8_t* src_argb, } #endif -#if defined(HAS_ARGBTOUVMATRIXROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_NEON_I8MM) - if (TestCpuFlag(kCpuHasNEON) && TestCpuFlag(kCpuHasNeonI8MM)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON_I8MM; - if (IS_ALIGNED(width, 16)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON_I8MM; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_SVE2) - if (TestCpuFlag(kCpuHasSVE2)) { - if (IS_ALIGNED(width, 2)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_SVE2; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_SME) - if (TestCpuFlag(kCpuHasSME)) { - if (IS_ALIGNED(width, 2)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_SME; - } - } -#endif #if defined(HAS_ARGBTOUVMATRIXROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { + if (IS_ALIGNED(width, 16)) { ARGBToUVMatrixRow = ARGBToUVMatrixRow_SSSE3; } } #endif -#if defined(HAS_ARGBTOUVMATRIXROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX2; - } - } -#endif - void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v, - uint8_t* dst_uv, int width) = MergeUVRow_C; - if (!src_argb || !dst_y || !dst_uv || !argbconstants || width <= 0 || - height == 0 || height == INT_MIN) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_argb = src_argb + (ptrdiff_t)(height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; - } -#if defined(HAS_MERGEUVROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2)) { - MergeUVRow = MergeUVRow_Any_SSE2; - if (IS_ALIGNED(halfwidth, 16)) { - MergeUVRow = MergeUVRow_SSE2; - } - } -#endif -#if defined(HAS_MERGEUVROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - MergeUVRow = MergeUVRow_Any_AVX2; - if (IS_ALIGNED(halfwidth, 32)) { - MergeUVRow = MergeUVRow_AVX2; - } - } -#endif -#if defined(HAS_MERGEUVROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - MergeUVRow = MergeUVRow_Any_AVX512BW; - if (IS_ALIGNED(halfwidth, 32)) { - MergeUVRow = MergeUVRow_AVX512BW; - } - } -#endif -#if defined(HAS_MERGEUVROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - MergeUVRow = MergeUVRow_Any_NEON; - if (IS_ALIGNED(halfwidth, 16)) { - MergeUVRow = MergeUVRow_NEON; - } - } -#endif -#if defined(HAS_MERGEUVROW_SME) - if (TestCpuFlag(kCpuHasSME)) { - MergeUVRow = MergeUVRow_SME; - } -#endif -#if defined(HAS_MERGEUVROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - MergeUVRow = MergeUVRow_Any_LSX; - if (IS_ALIGNED(halfwidth, 16)) { - MergeUVRow = MergeUVRow_LSX; - } - } -#endif -#if defined(HAS_MERGEUVROW_RVV) - if (TestCpuFlag(kCpuHasRVV)) { - MergeUVRow = MergeUVRow_RVV; - } -#endif - - // Allocate a rows of uv. - align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2); - uint8_t* row_v = row_u + ((halfwidth + 31) & ~31); - if (!row_u) - return 1; - - for (y = 0; y < height - 1; y += 2) { - ARGBToUVMatrixRow(src_argb, src_stride_argb, row_u, row_v, width, - argbconstants); - MergeUVRow(row_u, row_v, dst_uv, halfwidth); - ARGBToYMatrixRow(src_argb, dst_y, width, argbconstants); - ARGBToYMatrixRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width, - argbconstants); - src_argb += src_stride_argb * 2; - dst_y += dst_stride_y * 2; - dst_uv += dst_stride_uv; - } - if (height & 1) { - ARGBToUVMatrixRow(src_argb, 0, row_u, row_v, width, argbconstants); - MergeUVRow(row_u, row_v, dst_uv, halfwidth); - ARGBToYMatrixRow(src_argb, dst_y, width, argbconstants); - } - free_aligned_buffer_64(row_u); - return 0; -} - -int ARGBToNV21Matrix(const uint8_t* src_argb, - int src_stride_argb, - uint8_t* dst_y, - int dst_stride_y, - uint8_t* dst_vu, - int dst_stride_uv, - const struct ArgbConstants* argbconstants, - int width, - int height) { - int y; - int halfwidth = (width + 1) >> 1; - void (*ARGBToYMatrixRow)(const uint8_t* src_argb, uint8_t* dst_y, int width, - const struct ArgbConstants* c) = ARGBToYMatrixRow_C; - void (*ARGBToUVMatrixRow)(const uint8_t* src_argb, int src_stride_argb, - uint8_t* dst_u, uint8_t* dst_v, int width, - const struct ArgbConstants* c) = - ARGBToUVMatrixRow_C; - -#if defined(HAS_ARGBTOYMATRIXROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_AVX512BW; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_NEON; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_NEON_DOTPROD) - if (TestCpuFlag(kCpuHasNeonDotProd)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON_DotProd; - if (IS_ALIGNED(width, 16)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_NEON_DotProd; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LSX; - if (IS_ALIGNED(width, 16)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_LSX; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_LASX) - if (TestCpuFlag(kCpuHasLASX)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LASX; - if (IS_ALIGNED(width, 32)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_LASX; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_RVV) - if (TestCpuFlag(kCpuHasRVV)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_RVV; - } -#endif - -#if defined(HAS_ARGBTOUVMATRIXROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_NEON_I8MM) - if (TestCpuFlag(kCpuHasNEON) && TestCpuFlag(kCpuHasNeonI8MM)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON_I8MM; - if (IS_ALIGNED(width, 16)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON_I8MM; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_SVE2) - if (TestCpuFlag(kCpuHasSVE2)) { - if (IS_ALIGNED(width, 2)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_SVE2; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_SME) - if (TestCpuFlag(kCpuHasSME)) { - if (IS_ALIGNED(width, 2)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_SME; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX2; - } - } -#endif - void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v, - uint8_t* dst_vu, int width) = MergeUVRow_C; - if (!src_argb || !dst_y || !dst_vu || !argbconstants || width <= 0 || - height == 0 || height == INT_MIN) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_argb = src_argb + (ptrdiff_t)(height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; - } -#if defined(HAS_MERGEUVROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2)) { - MergeUVRow = MergeUVRow_Any_SSE2; - if (IS_ALIGNED(halfwidth, 16)) { - MergeUVRow = MergeUVRow_SSE2; - } - } -#endif -#if defined(HAS_MERGEUVROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - MergeUVRow = MergeUVRow_Any_AVX2; - if (IS_ALIGNED(halfwidth, 32)) { - MergeUVRow = MergeUVRow_AVX2; - } - } -#endif -#if defined(HAS_MERGEUVROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - MergeUVRow = MergeUVRow_Any_AVX512BW; - if (IS_ALIGNED(halfwidth, 32)) { - MergeUVRow = MergeUVRow_AVX512BW; - } - } -#endif -#if defined(HAS_MERGEUVROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - MergeUVRow = MergeUVRow_Any_NEON; - if (IS_ALIGNED(halfwidth, 16)) { - MergeUVRow = MergeUVRow_NEON; - } - } -#endif -#if defined(HAS_MERGEUVROW_SME) - if (TestCpuFlag(kCpuHasSME)) { - MergeUVRow = MergeUVRow_SME; - } -#endif -#if defined(HAS_MERGEUVROW_LSX) - if (TestCpuFlag(kCpuHasLSX)) { - MergeUVRow = MergeUVRow_Any_LSX; - if (IS_ALIGNED(halfwidth, 16)) { - MergeUVRow = MergeUVRow_LSX; - } - } -#endif -#if defined(HAS_MERGEUVROW_RVV) - if (TestCpuFlag(kCpuHasRVV)) { - MergeUVRow = MergeUVRow_RVV; - } -#endif - - // Allocate a rows of uv. - align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2); - uint8_t* row_v = row_u + ((halfwidth + 31) & ~31); - if (!row_u) - return 1; - - for (y = 0; y < height - 1; y += 2) { - ARGBToUVMatrixRow(src_argb, src_stride_argb, row_u, row_v, width, - argbconstants); - MergeUVRow(row_u, row_v, dst_vu, halfwidth); - ARGBToYMatrixRow(src_argb, dst_y, width, argbconstants); - ARGBToYMatrixRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width, - argbconstants); - src_argb += src_stride_argb * 2; - dst_y += dst_stride_y * 2; - dst_vu += dst_stride_uv; - } - if (height & 1) { - ARGBToUVMatrixRow(src_argb, 0, row_u, row_v, width, argbconstants); - MergeUVRow(row_u, row_v, dst_vu, halfwidth); - ARGBToYMatrixRow(src_argb, dst_y, width, argbconstants); - } - free_aligned_buffer_64(row_u); - return 0; -} -LIBYUV_API -int ARGBToI400Matrix(const uint8_t* src_argb, - int src_stride_argb, - uint8_t* dst_y, - int dst_stride_y, - const struct ArgbConstants* constants, - int width, - int height) { - int y; - void (*ARGBToYMatrixRow)(const uint8_t* src_argb, uint8_t* dst_y, int width, - const struct ArgbConstants* c) = ARGBToYMatrixRow_C; - if (!src_argb || !dst_y || !constants || width <= 0 || height == 0 || - height == INT_MIN) { - return -1; - } - if (height < 0) { - height = -height; - src_argb = src_argb + (ptrdiff_t)(height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; - } -#if defined(HAS_ARGBTOYMATRIXROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_AVX512BW; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_NEON; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_NEON_DOTPROD) - if (TestCpuFlag(kCpuHasNeonDotProd)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON_DotProd; - if (IS_ALIGNED(width, 16)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_NEON_DotProd; - } - } -#endif - - for (y = 0; y < height; ++y) { - ARGBToYMatrixRow(src_argb, dst_y, width, constants); - src_argb += src_stride_argb; - dst_y += dst_stride_y; - } - return 0; -} -LIBYUV_API -int ARGBToYUY2Matrix(const uint8_t* src_argb, - int src_stride_argb, - uint8_t* dst_yuy2, - int dst_stride_yuy2, - const struct ArgbConstants* constants, - int width, - int height) { - int y; - void (*ARGBToUVMatrixRow)(const uint8_t* src_argb, int src_stride_argb, - uint8_t* dst_u, uint8_t* dst_v, int width, - const struct ArgbConstants* c) = - ARGBToUVMatrixRow_C; - void (*ARGBToYMatrixRow)(const uint8_t* src_argb, uint8_t* dst_y, int width, - const struct ArgbConstants* c) = ARGBToYMatrixRow_C; - void (*I422ToYUY2Row)(const uint8_t* src_y, const uint8_t* src_u, - const uint8_t* src_v, uint8_t* dst_yuy2, int width) = - I422ToYUY2Row_C; - - if (!src_argb || !dst_yuy2 || !constants || width <= 0 || height == 0 || - height == INT_MIN) { - return -1; - } - if (height < 0) { - height = -height; - dst_yuy2 = dst_yuy2 + (ptrdiff_t)(height - 1) * dst_stride_yuy2; - dst_stride_yuy2 = -dst_stride_yuy2; - } -#if defined(HAS_ARGBTOYMATRIXROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_AVX512BW; - } - } -#endif -#if defined(HAS_ARGBTOYMATRIXROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToYMatrixRow = ARGBToYMatrixRow_NEON; - } - } -#endif #if defined(HAS_ARGBTOUVMATRIXROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2; @@ -929,6 +490,975 @@ int ARGBToYUY2Matrix(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOUVMATRIXROW_NEON_I8MM) + if (TestCpuFlag(kCpuHasNeonI8MM)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON_I8MM; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON_I8MM; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_SVE2) + if (TestCpuFlag(kCpuHasSVE2)) { + if (IS_ALIGNED(width, 2)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_SVE2; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_SME) + if (TestCpuFlag(kCpuHasSME)) { + if (IS_ALIGNED(width, 2)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_SME; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_LSX; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_LASX; + } + } +#endif + void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v, + uint8_t* dst_uv, int width) = MergeUVRow_C; + if (!src_argb || !dst_y || !dst_uv || !argbconstants || width <= 0 || + height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_argb = src_argb + (height - 1) * src_stride_argb; + src_stride_argb = -src_stride_argb; + } +#if defined(HAS_MERGEUVROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + MergeUVRow = MergeUVRow_Any_SSE2; + if (IS_ALIGNED(halfwidth, 16)) { + MergeUVRow = MergeUVRow_SSE2; + } + } +#endif +#if defined(HAS_MERGEUVROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + MergeUVRow = MergeUVRow_Any_AVX2; + if (IS_ALIGNED(halfwidth, 32)) { + MergeUVRow = MergeUVRow_AVX2; + } + } +#endif +#if defined(HAS_MERGEUVROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + MergeUVRow = MergeUVRow_Any_AVX512BW; + if (IS_ALIGNED(halfwidth, 32)) { + MergeUVRow = MergeUVRow_AVX512BW; + } + } +#endif +#if defined(HAS_MERGEUVROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + MergeUVRow = MergeUVRow_Any_NEON; + if (IS_ALIGNED(halfwidth, 16)) { + MergeUVRow = MergeUVRow_NEON; + } + } +#endif +#if defined(HAS_MERGEUVROW_SME) + if (TestCpuFlag(kCpuHasSME)) { + MergeUVRow = MergeUVRow_SME; + } +#endif +#if defined(HAS_MERGEUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + MergeUVRow = MergeUVRow_Any_LSX; + if (IS_ALIGNED(halfwidth, 16)) { + MergeUVRow = MergeUVRow_LSX; + } + } +#endif +#if defined(HAS_MERGEUVROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + MergeUVRow = MergeUVRow_RVV; + } +#endif + + // Allocate a rows of uv. + align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2); + uint8_t* row_v = row_u + ((halfwidth + 31) & ~31); + if (!row_u) + return 1; + + for (y = 0; y < height - 1; y += 2) { + ARGBToUVMatrixRow(src_argb, src_stride_argb, row_u, row_v, width, + argbconstants); + MergeUVRow(row_u, row_v, dst_uv, halfwidth); + ARGBToYMatrixRow(src_argb, dst_y, width, argbconstants); + ARGBToYMatrixRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width, + argbconstants); + src_argb += src_stride_argb * 2; + dst_y += dst_stride_y * 2; + dst_uv += dst_stride_uv; + } + if (height & 1) { + ARGBToUVMatrixRow(src_argb, 0, row_u, row_v, width, argbconstants); + MergeUVRow(row_u, row_v, dst_uv, halfwidth); + ARGBToYMatrixRow(src_argb, dst_y, width, argbconstants); + } + free_aligned_buffer_64(row_u); + return 0; +} + +// Same as NV12 but U and V swapped. +LIBYUV_API +int ARGBToNV21(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_vu, + int dst_stride_vu, + int width, + int height) { + int y; + int halfwidth = (width + 1) >> 1; + void (*ARGBToUVMatrixRow)(const uint8_t* src_argb, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width, + const struct ArgbConstants* c) = + ARGBToUVMatrixRow_C; + void (*ARGBToYMatrixRow)(const uint8_t* src_argb, uint8_t* dst_y, int width, + const struct ArgbConstants* c) = ARGBToYMatrixRow_C; + void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v, + uint8_t* dst_vu, int width) = MergeUVRow_C; + if (!src_argb || !dst_y || !dst_vu || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_argb = src_argb + (height - 1) * src_stride_argb; + src_stride_argb = -src_stride_argb; + } +#if defined(HAS_ARGBTOYMATRIXROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_AVX512BW; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_NEON; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_NEON_DOTPROD) + if (TestCpuFlag(kCpuHasNeonDotProd)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON_DotProd; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_NEON_DotProd; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_LSX; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_LASX; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_RVV; + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX512BW; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_NEON_I8MM) + if (TestCpuFlag(kCpuHasNeonI8MM)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON_I8MM; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON_I8MM; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_SVE2) + if (TestCpuFlag(kCpuHasSVE2)) { + if (IS_ALIGNED(width, 2)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_SVE2; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_SME) + if (TestCpuFlag(kCpuHasSME)) { + if (IS_ALIGNED(width, 2)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_SME; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_LSX; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_LASX; + } + } +#endif +#if defined(HAS_MERGEUVROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + MergeUVRow = MergeUVRow_Any_SSE2; + if (IS_ALIGNED(halfwidth, 16)) { + MergeUVRow = MergeUVRow_SSE2; + } + } +#endif +#if defined(HAS_MERGEUVROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + MergeUVRow = MergeUVRow_Any_AVX2; + if (IS_ALIGNED(halfwidth, 32)) { + MergeUVRow = MergeUVRow_AVX2; + } + } +#endif +#if defined(HAS_MERGEUVROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + MergeUVRow = MergeUVRow_Any_AVX512BW; + if (IS_ALIGNED(halfwidth, 64)) { + MergeUVRow = MergeUVRow_AVX512BW; + } + } +#endif +#if defined(HAS_MERGEUVROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + MergeUVRow = MergeUVRow_Any_NEON; + if (IS_ALIGNED(halfwidth, 16)) { + MergeUVRow = MergeUVRow_NEON; + } + } +#endif +#if defined(HAS_MERGEUVROW_SME) + if (TestCpuFlag(kCpuHasSME)) { + MergeUVRow = MergeUVRow_SME; + } +#endif +#if defined(HAS_MERGEUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + MergeUVRow = MergeUVRow_Any_LSX; + if (IS_ALIGNED(halfwidth, 16)) { + MergeUVRow = MergeUVRow_LSX; + } + } +#endif +#if defined(HAS_MERGEUVROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + MergeUVRow = MergeUVRow_RVV; + } +#endif + { + // Allocate a rows of uv. + align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2); + uint8_t* row_v = row_u + ((halfwidth + 31) & ~31); + if (!row_u) + return 1; + + for (y = 0; y < height - 1; y += 2) { + ARGBToUVMatrixRow(src_argb, src_stride_argb, row_u, row_v, width, &kArgbI601Constants); + MergeUVRow(row_v, row_u, dst_vu, halfwidth); + ARGBToYMatrixRow(src_argb, dst_y, width, &kArgbI601Constants); + ARGBToYMatrixRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width, &kArgbI601Constants); + src_argb += src_stride_argb * 2; + dst_y += dst_stride_y * 2; + dst_vu += dst_stride_vu; + } + if (height & 1) { + ARGBToUVMatrixRow(src_argb, 0, row_u, row_v, width, &kArgbI601Constants); + MergeUVRow(row_v, row_u, dst_vu, halfwidth); + ARGBToYMatrixRow(src_argb, dst_y, width, &kArgbI601Constants); + } + free_aligned_buffer_64(row_u); + } + return 0; +} + +LIBYUV_API +int ABGRToNV12(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_uv, + int dst_stride_uv, + int width, + int height) { + int y; + int halfwidth = (width + 1) >> 1; + void (*ABGRToUVRow)(const uint8_t* src_abgr0, int src_stride_abgr, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ABGRToUVRow_C; + void (*ABGRToYRow)(const uint8_t* src_abgr, uint8_t* dst_y, int width) = + ABGRToYRow_C; + void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v, + uint8_t* dst_uv, int width) = MergeUVRow_C; + if (!src_abgr || !dst_y || !dst_uv || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_abgr = src_abgr + (height - 1) * src_stride_abgr; + src_stride_abgr = -src_stride_abgr; + } +#if defined(HAS_ABGRTOYROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ABGRToYRow = ABGRToYRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ABGRToYRow = ABGRToYRow_SSSE3; + } + } +#endif +#if defined(HAS_ABGRTOUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ABGRToUVRow = ABGRToUVRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ABGRToUVRow = ABGRToUVRow_SSSE3; + } + } +#endif +#if defined(HAS_ABGRTOYROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ABGRToYRow = ABGRToYRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ABGRToYRow = ABGRToYRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOYROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + ABGRToYRow = ABGRToYRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + ABGRToYRow = ABGRToYRow_AVX512BW; + } + } +#endif +#if defined(HAS_ABGRTOUVROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ABGRToUVRow = ABGRToUVRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ABGRToUVRow = ABGRToUVRow_AVX2; + } + } +#endif +#if defined(HAS_ABGRTOUVROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + ABGRToUVRow = ABGRToUVRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + ABGRToUVRow = ABGRToUVRow_AVX512BW; + } + } +#endif +#if defined(HAS_ABGRTOYROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ABGRToYRow = ABGRToYRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ABGRToYRow = ABGRToYRow_NEON; + } + } +#endif +#if defined(HAS_ABGRTOYROW_NEON_DOTPROD) + if (TestCpuFlag(kCpuHasNeonDotProd)) { + ABGRToYRow = ABGRToYRow_Any_NEON_DotProd; + if (IS_ALIGNED(width, 16)) { + ABGRToYRow = ABGRToYRow_NEON_DotProd; + } + } +#endif +#if defined(HAS_ABGRTOUVROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ABGRToUVRow = ABGRToUVRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ABGRToUVRow = ABGRToUVRow_NEON; + } + } +#endif +#if defined(HAS_ABGRTOUVROW_NEON_I8MM) + if (TestCpuFlag(kCpuHasNeonI8MM)) { + ABGRToUVRow = ABGRToUVRow_Any_NEON_I8MM; + if (IS_ALIGNED(width, 16)) { + ABGRToUVRow = ABGRToUVRow_NEON_I8MM; + } + } +#endif +#if defined(HAS_ABGRTOUVROW_SVE2) + if (TestCpuFlag(kCpuHasSVE2)) { + ABGRToUVRow = ABGRToUVRow_Any_SVE2; + if (IS_ALIGNED(width, 2)) { + ABGRToUVRow = ABGRToUVRow_SVE2; + } + } +#endif +#if defined(HAS_ABGRTOUVROW_SME) + if (TestCpuFlag(kCpuHasSME)) { + ABGRToUVRow = ABGRToUVRow_Any_SME; + if (IS_ALIGNED(width, 2)) { + ABGRToUVRow = ABGRToUVRow_SME; + } + } +#endif +#if defined(HAS_ABGRTOYROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ABGRToYRow = ABGRToYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ABGRToYRow = ABGRToYRow_LSX; + } + } +#endif +#if defined(HAS_ABGRTOYROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ABGRToYRow = ABGRToYRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ABGRToYRow = ABGRToYRow_LASX; + } + } +#endif +#if defined(HAS_ABGRTOYROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ABGRToYRow = ABGRToYRow_RVV; + } +#endif +#if defined(HAS_MERGEUVROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + MergeUVRow = MergeUVRow_Any_SSE2; + if (IS_ALIGNED(halfwidth, 16)) { + MergeUVRow = MergeUVRow_SSE2; + } + } +#endif +#if defined(HAS_MERGEUVROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + MergeUVRow = MergeUVRow_Any_AVX2; + if (IS_ALIGNED(halfwidth, 32)) { + MergeUVRow = MergeUVRow_AVX2; + } + } +#endif +#if defined(HAS_MERGEUVROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + MergeUVRow = MergeUVRow_Any_AVX512BW; + if (IS_ALIGNED(halfwidth, 64)) { + MergeUVRow = MergeUVRow_AVX512BW; + } + } +#endif +#if defined(HAS_MERGEUVROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + MergeUVRow = MergeUVRow_Any_NEON; + if (IS_ALIGNED(halfwidth, 16)) { + MergeUVRow = MergeUVRow_NEON; + } + } +#endif +#if defined(HAS_MERGEUVROW_SME) + if (TestCpuFlag(kCpuHasSME)) { + MergeUVRow = MergeUVRow_SME; + } +#endif +#if defined(HAS_MERGEUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + MergeUVRow = MergeUVRow_Any_LSX; + if (IS_ALIGNED(halfwidth, 16)) { + MergeUVRow = MergeUVRow_LSX; + } + } +#endif +#if defined(HAS_MERGEUVROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + MergeUVRow = MergeUVRow_RVV; + } +#endif + { + // Allocate a rows of uv. + align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2); + uint8_t* row_v = row_u + ((halfwidth + 31) & ~31); + if (!row_u) + return 1; + + for (y = 0; y < height - 1; y += 2) { + ABGRToUVRow(src_abgr, src_stride_abgr, row_u, row_v, width); + MergeUVRow(row_u, row_v, dst_uv, halfwidth); + ABGRToYRow(src_abgr, dst_y, width); + ABGRToYRow(src_abgr + src_stride_abgr, dst_y + dst_stride_y, width); + src_abgr += src_stride_abgr * 2; + dst_y += dst_stride_y * 2; + dst_uv += dst_stride_uv; + } + if (height & 1) { + ABGRToUVRow(src_abgr, 0, row_u, row_v, width); + MergeUVRow(row_u, row_v, dst_uv, halfwidth); + ABGRToYRow(src_abgr, dst_y, width); + } + free_aligned_buffer_64(row_u); + } + return 0; +} + +// Same as NV12 but U and V swapped. +LIBYUV_API +int ABGRToNV21(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_y, + int dst_stride_y, + uint8_t* dst_vu, + int dst_stride_vu, + int width, + int height) { + int y; + int halfwidth = (width + 1) >> 1; + void (*ABGRToUVRow)(const uint8_t* src_abgr0, int src_stride_abgr, + uint8_t* dst_u, uint8_t* dst_v, int width) = + ABGRToUVRow_C; + void (*ABGRToYRow)(const uint8_t* src_abgr, uint8_t* dst_y, int width) = + ABGRToYRow_C; + void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v, + uint8_t* dst_vu, int width) = MergeUVRow_C; + if (!src_abgr || !dst_y || !dst_vu || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_abgr = src_abgr + (height - 1) * src_stride_abgr; + src_stride_abgr = -src_stride_abgr; + } +#if defined(HAS_ABGRTOYROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ABGRToYRow = ABGRToYRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ABGRToYRow = ABGRToYRow_SSSE3; + } + } +#endif +#if defined(HAS_ABGRTOUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ABGRToUVRow = ABGRToUVRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ABGRToUVRow = ABGRToUVRow_SSSE3; + } + } +#endif +#if defined(HAS_ABGRTOYROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ABGRToYRow = ABGRToYRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ABGRToYRow = ABGRToYRow_AVX2; + } + } +#endif +#if defined(HAS_ABGRTOUVROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ABGRToUVRow = ABGRToUVRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ABGRToUVRow = ABGRToUVRow_AVX2; + } + } +#endif +#if defined(HAS_ABGRTOUVROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + ABGRToUVRow = ABGRToUVRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + ABGRToUVRow = ABGRToUVRow_AVX512BW; + } + } +#endif +#if defined(HAS_ABGRTOYROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ABGRToYRow = ABGRToYRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ABGRToYRow = ABGRToYRow_NEON; + } + } +#endif +#if defined(HAS_ABGRTOYROW_NEON_DOTPROD) + if (TestCpuFlag(kCpuHasNeonDotProd)) { + ABGRToYRow = ABGRToYRow_Any_NEON_DotProd; + if (IS_ALIGNED(width, 16)) { + ABGRToYRow = ABGRToYRow_NEON_DotProd; + } + } +#endif +#if defined(HAS_ABGRTOUVROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ABGRToUVRow = ABGRToUVRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ABGRToUVRow = ABGRToUVRow_NEON; + } + } +#endif +#if defined(HAS_ABGRTOUVROW_NEON_I8MM) + if (TestCpuFlag(kCpuHasNeonI8MM)) { + ABGRToUVRow = ABGRToUVRow_Any_NEON_I8MM; + if (IS_ALIGNED(width, 16)) { + ABGRToUVRow = ABGRToUVRow_NEON_I8MM; + } + } +#endif +#if defined(HAS_ABGRTOUVROW_SVE2) + if (TestCpuFlag(kCpuHasSVE2)) { + ABGRToUVRow = ABGRToUVRow_Any_SVE2; + if (IS_ALIGNED(width, 2)) { + ABGRToUVRow = ABGRToUVRow_SVE2; + } + } +#endif +#if defined(HAS_ABGRTOUVROW_SME) + if (TestCpuFlag(kCpuHasSME)) { + ABGRToUVRow = ABGRToUVRow_Any_SME; + if (IS_ALIGNED(width, 2)) { + ABGRToUVRow = ABGRToUVRow_SME; + } + } +#endif +#if defined(HAS_ABGRTOYROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ABGRToYRow = ABGRToYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ABGRToYRow = ABGRToYRow_LSX; + } + } +#endif +#if defined(HAS_ABGRTOYROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ABGRToYRow = ABGRToYRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ABGRToYRow = ABGRToYRow_LASX; + } + } +#endif +#if defined(HAS_ABGRTOYROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ABGRToYRow = ABGRToYRow_RVV; + } +#endif +#if defined(HAS_MERGEUVROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + MergeUVRow = MergeUVRow_Any_SSE2; + if (IS_ALIGNED(halfwidth, 16)) { + MergeUVRow = MergeUVRow_SSE2; + } + } +#endif +#if defined(HAS_MERGEUVROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + MergeUVRow = MergeUVRow_Any_AVX2; + if (IS_ALIGNED(halfwidth, 32)) { + MergeUVRow = MergeUVRow_AVX2; + } + } +#endif +#if defined(HAS_MERGEUVROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + MergeUVRow = MergeUVRow_Any_AVX512BW; + if (IS_ALIGNED(halfwidth, 64)) { + MergeUVRow = MergeUVRow_AVX512BW; + } + } +#endif +#if defined(HAS_MERGEUVROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + MergeUVRow = MergeUVRow_Any_NEON; + if (IS_ALIGNED(halfwidth, 16)) { + MergeUVRow = MergeUVRow_NEON; + } + } +#endif +#if defined(HAS_MERGEUVROW_SME) + if (TestCpuFlag(kCpuHasSME)) { + MergeUVRow = MergeUVRow_SME; + } +#endif +#if defined(HAS_MERGEUVROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + MergeUVRow = MergeUVRow_Any_LSX; + if (IS_ALIGNED(halfwidth, 16)) { + MergeUVRow = MergeUVRow_LSX; + } + } +#endif +#if defined(HAS_MERGEUVROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + MergeUVRow = MergeUVRow_RVV; + } +#endif + { + // Allocate a rows of uv. + align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2); + uint8_t* row_v = row_u + ((halfwidth + 31) & ~31); + if (!row_u) + return 1; + + for (y = 0; y < height - 1; y += 2) { + ABGRToUVRow(src_abgr, src_stride_abgr, row_u, row_v, width); + MergeUVRow(row_v, row_u, dst_vu, halfwidth); + ABGRToYRow(src_abgr, dst_y, width); + ABGRToYRow(src_abgr + src_stride_abgr, dst_y + dst_stride_y, width); + src_abgr += src_stride_abgr * 2; + dst_y += dst_stride_y * 2; + dst_vu += dst_stride_vu; + } + if (height & 1) { + ABGRToUVRow(src_abgr, 0, row_u, row_v, width); + MergeUVRow(row_v, row_u, dst_vu, halfwidth); + ABGRToYRow(src_abgr, dst_y, width); + } + free_aligned_buffer_64(row_u); + } + return 0; +} + +// Convert ARGB to YUY2. +LIBYUV_API +int ARGBToYUY2(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_yuy2, + int dst_stride_yuy2, + int width, + int height) { + int y; + void (*ARGBToUVMatrixRow)(const uint8_t* src_argb, int src_stride_argb, + uint8_t* dst_u, uint8_t* dst_v, int width, + const struct ArgbConstants* c) = + ARGBToUVMatrixRow_C; + void (*ARGBToYMatrixRow)(const uint8_t* src_argb, uint8_t* dst_y, int width, + const struct ArgbConstants* c) = ARGBToYMatrixRow_C; + void (*I422ToYUY2Row)(const uint8_t* src_y, const uint8_t* src_u, + const uint8_t* src_v, uint8_t* dst_yuy2, int width) = + I422ToYUY2Row_C; + + if (!src_argb || !dst_yuy2 || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2; + dst_stride_yuy2 = -dst_stride_yuy2; + } + // Coalesce rows. + if (src_stride_argb == width * 4 && dst_stride_yuy2 == width * 2) { + width *= height; + height = 1; + src_stride_argb = dst_stride_yuy2 = 0; + } +#if defined(HAS_ARGBTOYMATRIXROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_AVX512BW; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_NEON; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_NEON_DOTPROD) + if (TestCpuFlag(kCpuHasNeonDotProd)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON_DotProd; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_NEON_DotProd; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_LSX; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_LASX; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_RVV; + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX512BW; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_NEON_I8MM) + if (TestCpuFlag(kCpuHasNeonI8MM)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON_I8MM; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON_I8MM; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_SVE2) + if (TestCpuFlag(kCpuHasSVE2)) { + if (IS_ALIGNED(width, 2)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_SVE2; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_SME) + if (TestCpuFlag(kCpuHasSME)) { + if (IS_ALIGNED(width, 2)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_SME; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_LSX; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_LASX; + } + } +#endif +#if defined(HAS_I422TOYUY2ROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + I422ToYUY2Row = I422ToYUY2Row_Any_SSE2; + if (IS_ALIGNED(width, 16)) { + I422ToYUY2Row = I422ToYUY2Row_SSE2; + } + } +#endif #if defined(HAS_I422TOYUY2ROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { I422ToYUY2Row = I422ToYUY2Row_Any_AVX2; @@ -945,8 +1475,25 @@ int ARGBToYUY2Matrix(const uint8_t* src_argb, } } #endif +#if defined(HAS_I422TOYUY2ROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + I422ToYUY2Row = I422ToYUY2Row_Any_LSX; + if (IS_ALIGNED(width, 16)) { + I422ToYUY2Row = I422ToYUY2Row_LSX; + } + } +#endif +#if defined(HAS_I422TOYUY2ROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + I422ToYUY2Row = I422ToYUY2Row_Any_LASX; + if (IS_ALIGNED(width, 32)) { + I422ToYUY2Row = I422ToYUY2Row_LASX; + } + } +#endif { + // Allocate a rows of yuv. align_buffer_64(row_y, ((width + 63) & ~63) * 2); uint8_t* row_u = row_y + ((width + 63) & ~63); uint8_t* row_v = row_u + ((width + 63) & ~63) / 2; @@ -954,8 +1501,8 @@ int ARGBToYUY2Matrix(const uint8_t* src_argb, return 1; for (y = 0; y < height; ++y) { - ARGBToUVMatrixRow(src_argb, 0, row_u, row_v, width, constants); - ARGBToYMatrixRow(src_argb, row_y, width, constants); + ARGBToUVMatrixRow(src_argb, 0, row_u, row_v, width, &kArgbI601Constants); + ARGBToYMatrixRow(src_argb, row_y, width, &kArgbI601Constants); I422ToYUY2Row(row_y, row_u, row_v, dst_yuy2, width); src_argb += src_stride_argb; dst_yuy2 += dst_stride_yuy2; @@ -966,14 +1513,14 @@ int ARGBToYUY2Matrix(const uint8_t* src_argb, return 0; } +// Convert ARGB to UYVY. LIBYUV_API -int ARGBToUYVYMatrix(const uint8_t* src_argb, - int src_stride_argb, - uint8_t* dst_uyvy, - int dst_stride_uyvy, - const struct ArgbConstants* constants, - int width, - int height) { +int ARGBToUYVY(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_uyvy, + int dst_stride_uyvy, + int width, + int height) { int y; void (*ARGBToUVMatrixRow)(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width, @@ -985,15 +1532,29 @@ int ARGBToUYVYMatrix(const uint8_t* src_argb, const uint8_t* src_v, uint8_t* dst_uyvy, int width) = I422ToUYVYRow_C; - if (!src_argb || !dst_uyvy || !constants || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_argb || !dst_uyvy || width <= 0 || height == 0) { return -1; } + // Negative height means invert the image. if (height < 0) { height = -height; - dst_uyvy = dst_uyvy + (ptrdiff_t)(height - 1) * dst_stride_uyvy; + dst_uyvy = dst_uyvy + (height - 1) * dst_stride_uyvy; dst_stride_uyvy = -dst_stride_uyvy; } + // Coalesce rows. + if (src_stride_argb == width * 4 && dst_stride_uyvy == width * 2) { + width *= height; + height = 1; + src_stride_argb = dst_stride_uyvy = 0; + } +#if defined(HAS_ARGBTOYMATRIXROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_SSSE3; + } + } +#endif #if defined(HAS_ARGBTOYMATRIXROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX2; @@ -1018,6 +1579,43 @@ int ARGBToUYVYMatrix(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOYMATRIXROW_NEON_DOTPROD) + if (TestCpuFlag(kCpuHasNeonDotProd)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON_DotProd; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_NEON_DotProd; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_LSX; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_LASX; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_RVV; + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_SSSE3; + } + } +#endif #if defined(HAS_ARGBTOUVMATRIXROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2; @@ -1042,6 +1640,52 @@ int ARGBToUYVYMatrix(const uint8_t* src_argb, } } #endif +#if defined(HAS_ARGBTOUVMATRIXROW_NEON_I8MM) + if (TestCpuFlag(kCpuHasNeonI8MM)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON_I8MM; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON_I8MM; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_SVE2) + if (TestCpuFlag(kCpuHasSVE2)) { + if (IS_ALIGNED(width, 2)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_SVE2; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_SME) + if (TestCpuFlag(kCpuHasSME)) { + if (IS_ALIGNED(width, 2)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_SME; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_LSX; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_LASX; + } + } +#endif +#if defined(HAS_I422TOUYVYROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + I422ToUYVYRow = I422ToUYVYRow_Any_SSE2; + if (IS_ALIGNED(width, 16)) { + I422ToUYVYRow = I422ToUYVYRow_SSE2; + } + } +#endif #if defined(HAS_I422TOUYVYROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { I422ToUYVYRow = I422ToUYVYRow_Any_AVX2; @@ -1058,8 +1702,25 @@ int ARGBToUYVYMatrix(const uint8_t* src_argb, } } #endif +#if defined(HAS_I422TOUYVYROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + I422ToUYVYRow = I422ToUYVYRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + I422ToUYVYRow = I422ToUYVYRow_LSX; + } + } +#endif +#if defined(HAS_I422TOUYVYROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + I422ToUYVYRow = I422ToUYVYRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + I422ToUYVYRow = I422ToUYVYRow_LASX; + } + } +#endif { + // Allocate a rows of yuv. align_buffer_64(row_y, ((width + 63) & ~63) * 2); uint8_t* row_u = row_y + ((width + 63) & ~63); uint8_t* row_v = row_u + ((width + 63) & ~63) / 2; @@ -1067,8 +1728,8 @@ int ARGBToUYVYMatrix(const uint8_t* src_argb, return 1; for (y = 0; y < height; ++y) { - ARGBToUVMatrixRow(src_argb, 0, row_u, row_v, width, constants); - ARGBToYMatrixRow(src_argb, row_y, width, constants); + ARGBToUVMatrixRow(src_argb, 0, row_u, row_v, width, &kArgbI601Constants); + ARGBToYMatrixRow(src_argb, row_y, width, &kArgbI601Constants); I422ToUYVYRow(row_y, row_u, row_v, dst_uyvy, width); src_argb += src_stride_argb; dst_uyvy += dst_stride_uyvy; @@ -1079,75 +1740,16 @@ int ARGBToUYVYMatrix(const uint8_t* src_argb, return 0; } -// Same as NV12 but U and V swapped. -LIBYUV_API -int ARGBToNV21(const uint8_t* src_argb, - int src_stride_argb, - uint8_t* dst_y, - int dst_stride_y, - uint8_t* dst_vu, - int dst_stride_vu, - int width, - int height) { - return ARGBToNV21Matrix(src_argb, src_stride_argb, dst_y, dst_stride_y, - dst_vu, dst_stride_vu, &kArgbI601Constants, width, - height); -} - -LIBYUV_API -int ABGRToNV12(const uint8_t* src_abgr, - int src_stride_abgr, - uint8_t* dst_y, - int dst_stride_y, - uint8_t* dst_uv, - int dst_stride_uv, - int width, - int height) { - return ARGBToNV12Matrix(src_abgr, src_stride_abgr, dst_y, dst_stride_y, - dst_uv, dst_stride_uv, &kAbgrI601Constants, width, - height); -} - -// Same as NV12 but U and V swapped. -LIBYUV_API -int ABGRToNV21(const uint8_t* src_abgr, - int src_stride_abgr, - uint8_t* dst_y, - int dst_stride_y, - uint8_t* dst_vu, - int dst_stride_vu, - int width, - int height) { - return ARGBToNV21Matrix(src_abgr, src_stride_abgr, dst_y, dst_stride_y, - dst_vu, dst_stride_vu, &kAbgrI601Constants, width, - height); -} - -// Convert ARGB to YUY2. -LIBYUV_API -int ARGBToYUY2(const uint8_t* src_argb, - int src_stride_argb, - uint8_t* dst_yuy2, - int dst_stride_yuy2, - int width, - int height) { - return ARGBToYUY2Matrix(src_argb, src_stride_argb, dst_yuy2, dst_stride_yuy2, - &kArgbI601Constants, width, height); -} - -// Convert ARGB to UYVY. -LIBYUV_API -int ARGBToUYVY(const uint8_t* src_argb, - int src_stride_argb, - uint8_t* dst_uyvy, - int dst_stride_uyvy, - int width, - int height) { - return ARGBToUYVYMatrix(src_argb, src_stride_argb, dst_uyvy, dst_stride_uyvy, - &kArgbI601Constants, width, height); -} - // Convert ARGB to I400. +LIBYUV_API +int ARGBToI400Matrix(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_y, + int dst_stride_y, + const struct ArgbConstants* argbconstants, + int width, + int height); + LIBYUV_API int ARGBToI400(const uint8_t* src_argb, int src_stride_argb, @@ -1158,6 +1760,100 @@ int ARGBToI400(const uint8_t* src_argb, return ARGBToI400Matrix(src_argb, src_stride_argb, dst_y, dst_stride_y, &kArgbI601Constants, width, height); } +LIBYUV_API +int ARGBToI400Matrix(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_y, + int dst_stride_y, + const struct ArgbConstants* argbconstants, + int width, + int height) { + int y; + void (*ARGBToYMatrixRow)(const uint8_t* src_argb, uint8_t* dst_y, int width, + const struct ArgbConstants* c) = ARGBToYMatrixRow_C; + if (!src_argb || !dst_y || width <= 0 || height == 0) { + return -1; + } + if (height < 0) { + height = -height; + src_argb = src_argb + (height - 1) * src_stride_argb; + src_stride_argb = -src_stride_argb; + } + // Coalesce rows. + if (src_stride_argb == width * 4 && dst_stride_y == width) { + width *= height; + height = 1; + src_stride_argb = dst_stride_y = 0; + } +#if defined(HAS_ARGBTOYMATRIXROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_AVX512BW; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_NEON; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_NEON_DOTPROD) + if (TestCpuFlag(kCpuHasNeonDotProd)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON_DotProd; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_NEON_DotProd; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_LSX; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_LASX; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_RVV; + } +#endif + + for (y = 0; y < height; ++y) { + ARGBToYMatrixRow(src_argb, dst_y, width, argbconstants); + src_argb += src_stride_argb; + dst_y += dst_stride_y; + } + return 0; +} #ifndef __riscv // Shuffle table for converting ARGB to RGBA. @@ -1187,18 +1883,16 @@ int ARGBToRGBA(const uint8_t* src_argb, int y; void (*ARGBToRGBARow)(const uint8_t* src_argb, uint8_t* dst_rgba, int width) = ARGBToRGBARow_C; - if (!src_argb || !dst_rgba || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_argb || !dst_rgba || width <= 0 || height == 0) { return -1; } if (height < 0) { height = -height; - src_argb = src_argb + (ptrdiff_t)(height - 1) * src_stride_argb; + src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride_argb == width * 4 && dst_stride_rgba == width * 4 && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_argb == width * 4 && dst_stride_rgba == width * 4) { width *= height; height = 1; src_stride_argb = dst_stride_rgba = 0; @@ -1230,18 +1924,16 @@ int ARGBToRGB24(const uint8_t* src_argb, int y; void (*ARGBToRGB24Row)(const uint8_t* src_argb, uint8_t* dst_rgb, int width) = ARGBToRGB24Row_C; - if (!src_argb || !dst_rgb24 || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_argb || !dst_rgb24 || width <= 0 || height == 0) { return -1; } if (height < 0) { height = -height; - src_argb = src_argb + (ptrdiff_t)(height - 1) * src_stride_argb; + src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride_argb == width * 4 && dst_stride_rgb24 == width * 3 && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_argb == width * 4 && dst_stride_rgb24 == width * 3) { width *= height; height = 1; src_stride_argb = dst_stride_rgb24 = 0; @@ -1324,17 +2016,16 @@ int ARGBToRAW(const uint8_t* src_argb, int y; void (*ARGBToRAWRow)(const uint8_t* src_argb, uint8_t* dst_rgb, int width) = ARGBToRAWRow_C; - if (!src_argb || !dst_raw || width <= 0 || height == 0 || height == INT_MIN) { + if (!src_argb || !dst_raw || width <= 0 || height == 0) { return -1; } if (height < 0) { height = -height; - src_argb = src_argb + (ptrdiff_t)(height - 1) * src_stride_argb; + src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride_argb == width * 4 && dst_stride_raw == width * 3 && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_argb == width * 4 && dst_stride_raw == width * 3) { width *= height; height = 1; src_stride_argb = dst_stride_raw = 0; @@ -1416,19 +2107,25 @@ int ARGBToRGB565Dither(const uint8_t* src_argb, void (*ARGBToRGB565DitherRow)(const uint8_t* src_argb, uint8_t* dst_rgb, uint32_t dither4, int width) = ARGBToRGB565DitherRow_C; - if (!src_argb || !dst_rgb565 || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) { return -1; } if (height < 0) { height = -height; - src_argb = src_argb + (ptrdiff_t)(height - 1) * src_stride_argb; + src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } if (!dither4x4) { dither4x4 = kDither565_4x4; } - +#if defined(HAS_ARGBTORGB565DITHERROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_SSE2; + if (IS_ALIGNED(width, 4)) { + ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_SSE2; + } + } +#endif #if defined(HAS_ARGBTORGB565DITHERROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_AVX2; @@ -1489,23 +2186,28 @@ int ARGBToRGB565(const uint8_t* src_argb, int y; void (*ARGBToRGB565Row)(const uint8_t* src_argb, uint8_t* dst_rgb, int width) = ARGBToRGB565Row_C; - if (!src_argb || !dst_rgb565 || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) { return -1; } if (height < 0) { height = -height; - src_argb = src_argb + (ptrdiff_t)(height - 1) * src_stride_argb; + src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride_argb == width * 4 && dst_stride_rgb565 == width * 2 && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_argb == width * 4 && dst_stride_rgb565 == width * 2) { width *= height; height = 1; src_stride_argb = dst_stride_rgb565 = 0; } - +#if defined(HAS_ARGBTORGB565ROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + ARGBToRGB565Row = ARGBToRGB565Row_Any_SSE2; + if (IS_ALIGNED(width, 4)) { + ARGBToRGB565Row = ARGBToRGB565Row_SSE2; + } + } +#endif #if defined(HAS_ARGBTORGB565ROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToRGB565Row = ARGBToRGB565Row_Any_AVX2; @@ -1564,23 +2266,28 @@ int ARGBToARGB1555(const uint8_t* src_argb, int y; void (*ARGBToARGB1555Row)(const uint8_t* src_argb, uint8_t* dst_rgb, int width) = ARGBToARGB1555Row_C; - if (!src_argb || !dst_argb1555 || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_argb || !dst_argb1555 || width <= 0 || height == 0) { return -1; } if (height < 0) { height = -height; - src_argb = src_argb + (ptrdiff_t)(height - 1) * src_stride_argb; + src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride_argb == width * 4 && dst_stride_argb1555 == width * 2 && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_argb == width * 4 && dst_stride_argb1555 == width * 2) { width *= height; height = 1; src_stride_argb = dst_stride_argb1555 = 0; } - +#if defined(HAS_ARGBTOARGB1555ROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + ARGBToARGB1555Row = ARGBToARGB1555Row_Any_SSE2; + if (IS_ALIGNED(width, 4)) { + ARGBToARGB1555Row = ARGBToARGB1555Row_SSE2; + } + } +#endif #if defined(HAS_ARGBTOARGB1555ROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToARGB1555Row = ARGBToARGB1555Row_Any_AVX2; @@ -1633,23 +2340,28 @@ int ARGBToARGB4444(const uint8_t* src_argb, int y; void (*ARGBToARGB4444Row)(const uint8_t* src_argb, uint8_t* dst_rgb, int width) = ARGBToARGB4444Row_C; - if (!src_argb || !dst_argb4444 || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_argb || !dst_argb4444 || width <= 0 || height == 0) { return -1; } if (height < 0) { height = -height; - src_argb = src_argb + (ptrdiff_t)(height - 1) * src_stride_argb; + src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride_argb == width * 4 && dst_stride_argb4444 == width * 2 && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_argb == width * 4 && dst_stride_argb4444 == width * 2) { width *= height; height = 1; src_stride_argb = dst_stride_argb4444 = 0; } - +#if defined(HAS_ARGBTOARGB4444ROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + ARGBToARGB4444Row = ARGBToARGB4444Row_Any_SSE2; + if (IS_ALIGNED(width, 4)) { + ARGBToARGB4444Row = ARGBToARGB4444Row_SSE2; + } + } +#endif #if defined(HAS_ARGBTOARGB4444ROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToARGB4444Row = ARGBToARGB4444Row_Any_AVX2; @@ -1702,18 +2414,16 @@ int ABGRToAR30(const uint8_t* src_abgr, int y; void (*ABGRToAR30Row)(const uint8_t* src_abgr, uint8_t* dst_rgb, int width) = ABGRToAR30Row_C; - if (!src_abgr || !dst_ar30 || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_abgr || !dst_ar30 || width <= 0 || height == 0) { return -1; } if (height < 0) { height = -height; - src_abgr = src_abgr + (ptrdiff_t)(height - 1) * src_stride_abgr; + src_abgr = src_abgr + (height - 1) * src_stride_abgr; src_stride_abgr = -src_stride_abgr; } // Coalesce rows. - if (src_stride_abgr == width * 4 && dst_stride_ar30 == width * 4 && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_abgr == width * 4 && dst_stride_ar30 == width * 4) { width *= height; height = 1; src_stride_abgr = dst_stride_ar30 = 0; @@ -1761,18 +2471,16 @@ int ARGBToAR30(const uint8_t* src_argb, int y; void (*ARGBToAR30Row)(const uint8_t* src_argb, uint8_t* dst_rgb, int width) = ARGBToAR30Row_C; - if (!src_argb || !dst_ar30 || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_argb || !dst_ar30 || width <= 0 || height == 0) { return -1; } if (height < 0) { height = -height; - src_argb = src_argb + (ptrdiff_t)(height - 1) * src_stride_argb; + src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride_argb == width * 4 && dst_stride_ar30 == width * 4 && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_argb == width * 4 && dst_stride_ar30 == width * 4) { width *= height; height = 1; src_stride_argb = dst_stride_ar30 = 0; @@ -1809,68 +2517,10 @@ int ARGBToAR30(const uint8_t* src_argb, return 0; } -// ARGB little endian (bgra in memory) to J444 -LIBYUV_API -int ARGBToJ444(const uint8_t* src_argb, - int src_stride_argb, - uint8_t* dst_y, - int dst_stride_y, - uint8_t* dst_u, - int dst_stride_u, - uint8_t* dst_v, - int dst_stride_v, - int width, - int height) { - return ARGBToI444Matrix(src_argb, src_stride_argb, dst_y, dst_stride_y, dst_u, - dst_stride_u, dst_v, dst_stride_v, - &kArgbJPEGConstants, width, height); -} -// Convert ARGB to J420. (JPeg full range I420). -LIBYUV_API -int ARGBToJ420(const uint8_t* src_argb, - int src_stride_argb, - uint8_t* dst_y, - int dst_stride_y, - uint8_t* dst_u, - int dst_stride_u, - uint8_t* dst_v, - int dst_stride_v, - int width, - int height) { - return ARGBToI420Matrix(src_argb, src_stride_argb, dst_y, dst_stride_y, dst_u, - dst_stride_u, dst_v, dst_stride_v, - &kArgbJPEGConstants, width, height); -} // Convert ARGB to J422. (JPeg full range I422). -LIBYUV_API -int ARGBToJ422(const uint8_t* src_argb, - int src_stride_argb, - uint8_t* dst_y, - int dst_stride_y, - uint8_t* dst_u, - int dst_stride_u, - uint8_t* dst_v, - int dst_stride_v, - int width, - int height) { - return ARGBToI422Matrix(src_argb, src_stride_argb, dst_y, dst_stride_y, dst_u, - dst_stride_u, dst_v, dst_stride_v, - &kArgbJPEGConstants, width, height); -} -// Convert ARGB to J400. -LIBYUV_API -int ARGBToJ400(const uint8_t* src_argb, - int src_stride_argb, - uint8_t* dst_y, - int dst_stride_y, - int width, - int height) { - return ARGBToI400Matrix(src_argb, src_stride_argb, dst_y, dst_stride_y, - &kArgbJPEGConstants, width, height); -} // Convert RGBA to J400. LIBYUV_API @@ -1883,17 +2533,16 @@ int RGBAToJ400(const uint8_t* src_rgba, int y; void (*RGBAToYJRow)(const uint8_t* src_rgba, uint8_t* dst_yj, int width) = RGBAToYJRow_C; - if (!src_rgba || !dst_yj || width <= 0 || height == 0 || height == INT_MIN) { + if (!src_rgba || !dst_yj || width <= 0 || height == 0) { return -1; } if (height < 0) { height = -height; - src_rgba = src_rgba + (ptrdiff_t)(height - 1) * src_stride_rgba; + src_rgba = src_rgba + (height - 1) * src_stride_rgba; src_stride_rgba = -src_stride_rgba; } // Coalesce rows. - if (src_stride_rgba == width * 4 && dst_stride_yj == width && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_rgba == width * 4 && dst_stride_yj == width) { width *= height; height = 1; src_stride_rgba = dst_stride_yj = 0; @@ -1972,34 +2621,316 @@ int RGBAToJ400(const uint8_t* src_rgba, LIBYUV_API int ABGRToJ420(const uint8_t* src_abgr, int src_stride_abgr, - uint8_t* dst_y, - int dst_stride_y, - uint8_t* dst_u, - int dst_stride_u, - uint8_t* dst_v, - int dst_stride_v, + uint8_t* dst_yj, + int dst_stride_yj, + uint8_t* dst_uj, + int dst_stride_uj, + uint8_t* dst_vj, + int dst_stride_vj, int width, int height) { - return ARGBToI420Matrix(src_abgr, src_stride_abgr, dst_y, dst_stride_y, dst_u, - dst_stride_u, dst_v, dst_stride_v, - &kAbgrJPEGConstants, width, height); + int y; + void (*ABGRToUVJRow)(const uint8_t* src_abgr0, int src_stride_abgr, + uint8_t* dst_uj, uint8_t* dst_vj, int width) = + ABGRToUVJRow_C; + void (*ABGRToYJRow)(const uint8_t* src_abgr, uint8_t* dst_yj, int width) = + ABGRToYJRow_C; + if (!src_abgr || !dst_yj || !dst_uj || !dst_vj || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_abgr = src_abgr + (height - 1) * src_stride_abgr; + src_stride_abgr = -src_stride_abgr; + } +#if defined(HAS_ABGRTOYJROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ABGRToYJRow = ABGRToYJRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ABGRToYJRow = ABGRToYJRow_SSSE3; + } + } +#endif +#if defined(HAS_ABGRTOUVJROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ABGRToUVJRow = ABGRToUVJRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ABGRToUVJRow = ABGRToUVJRow_SSSE3; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ABGRToYJRow = ABGRToYJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ABGRToYJRow = ABGRToYJRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOYROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + ABGRToYJRow = ABGRToYJRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + ABGRToYJRow = ABGRToYJRow_AVX512BW; + } + } +#endif +#if defined(HAS_ABGRTOUVJROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ABGRToUVJRow = ABGRToUVJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ABGRToUVJRow = ABGRToUVJRow_AVX2; + } + } +#endif +#if defined(HAS_ABGRTOUVJROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + ABGRToUVJRow = ABGRToUVJRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + ABGRToUVJRow = ABGRToUVJRow_AVX512BW; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ABGRToYJRow = ABGRToYJRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ABGRToYJRow = ABGRToYJRow_NEON; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_NEON_DOTPROD) + if (TestCpuFlag(kCpuHasNeonDotProd)) { + ABGRToYJRow = ABGRToYJRow_Any_NEON_DotProd; + if (IS_ALIGNED(width, 16)) { + ABGRToYJRow = ABGRToYJRow_NEON_DotProd; + } + } +#endif +#if defined(HAS_ABGRTOUVJROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ABGRToUVJRow = ABGRToUVJRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ABGRToUVJRow = ABGRToUVJRow_NEON; + } + } +#endif +#if defined(HAS_ABGRTOUVJROW_NEON_I8MM) + if (TestCpuFlag(kCpuHasNeonI8MM)) { + ABGRToUVJRow = ABGRToUVJRow_Any_NEON_I8MM; + if (IS_ALIGNED(width, 16)) { + ABGRToUVJRow = ABGRToUVJRow_NEON_I8MM; + } + } +#endif +#if defined(HAS_ABGRTOUVJROW_SVE2) + if (TestCpuFlag(kCpuHasSVE2)) { + ABGRToUVJRow = ABGRToUVJRow_Any_SVE2; + if (IS_ALIGNED(width, 2)) { + ABGRToUVJRow = ABGRToUVJRow_SVE2; + } + } +#endif +#if defined(HAS_ABGRTOUVJROW_SME) + if (TestCpuFlag(kCpuHasSME)) { + ABGRToUVJRow = ABGRToUVJRow_Any_SME; + if (IS_ALIGNED(width, 2)) { + ABGRToUVJRow = ABGRToUVJRow_SME; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ABGRToYJRow = ABGRToYJRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ABGRToYJRow = ABGRToYJRow_LSX; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ABGRToYJRow = ABGRToYJRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ABGRToYJRow = ABGRToYJRow_LASX; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ABGRToYJRow = ABGRToYJRow_RVV; + } +#endif + + for (y = 0; y < height - 1; y += 2) { + ABGRToUVJRow(src_abgr, src_stride_abgr, dst_uj, dst_vj, width); + ABGRToYJRow(src_abgr, dst_yj, width); + ABGRToYJRow(src_abgr + src_stride_abgr, dst_yj + dst_stride_yj, width); + src_abgr += src_stride_abgr * 2; + dst_yj += dst_stride_yj * 2; + dst_uj += dst_stride_uj; + dst_vj += dst_stride_vj; + } + if (height & 1) { + ABGRToUVJRow(src_abgr, 0, dst_uj, dst_vj, width); + ABGRToYJRow(src_abgr, dst_yj, width); + } + return 0; } // Convert ABGR to J422. (JPeg full range I422). LIBYUV_API int ABGRToJ422(const uint8_t* src_abgr, int src_stride_abgr, - uint8_t* dst_y, - int dst_stride_y, - uint8_t* dst_u, - int dst_stride_u, - uint8_t* dst_v, - int dst_stride_v, + uint8_t* dst_yj, + int dst_stride_yj, + uint8_t* dst_uj, + int dst_stride_uj, + uint8_t* dst_vj, + int dst_stride_vj, int width, int height) { - return ARGBToI422Matrix(src_abgr, src_stride_abgr, dst_y, dst_stride_y, dst_u, - dst_stride_u, dst_v, dst_stride_v, - &kAbgrJPEGConstants, width, height); + int y; + void (*ABGRToUVJRow)(const uint8_t* src_abgr0, int src_stride_abgr, + uint8_t* dst_uj, uint8_t* dst_vj, int width) = + ABGRToUVJRow_C; + void (*ABGRToYJRow)(const uint8_t* src_abgr, uint8_t* dst_yj, int width) = + ABGRToYJRow_C; + if (!src_abgr || !dst_yj || !dst_uj || !dst_vj || width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_abgr = src_abgr + (height - 1) * src_stride_abgr; + src_stride_abgr = -src_stride_abgr; + } + // Coalesce rows. + if (src_stride_abgr == width * 4 && dst_stride_yj == width && + dst_stride_uj * 2 == width && dst_stride_vj * 2 == width) { + width *= height; + height = 1; + src_stride_abgr = dst_stride_yj = dst_stride_uj = dst_stride_vj = 0; + } +#if defined(HAS_ABGRTOYJROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ABGRToYJRow = ABGRToYJRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ABGRToYJRow = ABGRToYJRow_SSSE3; + } + } +#endif +#if defined(HAS_ABGRTOUVJROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ABGRToUVJRow = ABGRToUVJRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ABGRToUVJRow = ABGRToUVJRow_SSSE3; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ABGRToYJRow = ABGRToYJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ABGRToYJRow = ABGRToYJRow_AVX2; + } + } +#endif +#if defined(HAS_ABGRTOUVJROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ABGRToUVJRow = ABGRToUVJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ABGRToUVJRow = ABGRToUVJRow_AVX2; + } + } +#endif +#if defined(HAS_ABGRTOUVJROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + ABGRToUVJRow = ABGRToUVJRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + ABGRToUVJRow = ABGRToUVJRow_AVX512BW; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ABGRToYJRow = ABGRToYJRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ABGRToYJRow = ABGRToYJRow_NEON; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_NEON_DOTPROD) + if (TestCpuFlag(kCpuHasNeonDotProd)) { + ABGRToYJRow = ABGRToYJRow_Any_NEON_DotProd; + if (IS_ALIGNED(width, 16)) { + ABGRToYJRow = ABGRToYJRow_NEON_DotProd; + } + } +#endif +#if defined(HAS_ABGRTOUVJROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ABGRToUVJRow = ABGRToUVJRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ABGRToUVJRow = ABGRToUVJRow_NEON; + } + } +#endif +#if defined(HAS_ABGRTOUVJROW_NEON_I8MM) + if (TestCpuFlag(kCpuHasNeonI8MM)) { + ABGRToUVJRow = ABGRToUVJRow_Any_NEON_I8MM; + if (IS_ALIGNED(width, 16)) { + ABGRToUVJRow = ABGRToUVJRow_NEON_I8MM; + } + } +#endif +#if defined(HAS_ABGRTOUVJROW_SVE2) + if (TestCpuFlag(kCpuHasSVE2)) { + ABGRToUVJRow = ABGRToUVJRow_Any_SVE2; + if (IS_ALIGNED(width, 2)) { + ABGRToUVJRow = ABGRToUVJRow_SVE2; + } + } +#endif +#if defined(HAS_ABGRTOUVJROW_SME) + if (TestCpuFlag(kCpuHasSME)) { + ABGRToUVJRow = ABGRToUVJRow_Any_SME; + if (IS_ALIGNED(width, 2)) { + ABGRToUVJRow = ABGRToUVJRow_SME; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ABGRToYJRow = ABGRToYJRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ABGRToYJRow = ABGRToYJRow_LSX; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ABGRToYJRow = ABGRToYJRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ABGRToYJRow = ABGRToYJRow_LASX; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ABGRToYJRow = ABGRToYJRow_RVV; + } +#endif + + for (y = 0; y < height; ++y) { + ABGRToUVJRow(src_abgr, 0, dst_uj, dst_vj, width); + ABGRToYJRow(src_abgr, dst_yj, width); + src_abgr += src_stride_abgr; + dst_yj += dst_stride_yj; + dst_uj += dst_stride_uj; + dst_vj += dst_stride_vj; + } + return 0; } // Convert ABGR to J400. @@ -2010,8 +2941,83 @@ int ABGRToJ400(const uint8_t* src_abgr, int dst_stride_yj, int width, int height) { - return ARGBToI400Matrix(src_abgr, src_stride_abgr, dst_yj, dst_stride_yj, - &kAbgrJPEGConstants, width, height); + int y; + void (*ABGRToYJRow)(const uint8_t* src_abgr, uint8_t* dst_yj, int width) = + ABGRToYJRow_C; + if (!src_abgr || !dst_yj || width <= 0 || height == 0) { + return -1; + } + if (height < 0) { + height = -height; + src_abgr = src_abgr + (height - 1) * src_stride_abgr; + src_stride_abgr = -src_stride_abgr; + } + // Coalesce rows. + if (src_stride_abgr == width * 4 && dst_stride_yj == width) { + width *= height; + height = 1; + src_stride_abgr = dst_stride_yj = 0; + } +#if defined(HAS_ABGRTOYJROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ABGRToYJRow = ABGRToYJRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ABGRToYJRow = ABGRToYJRow_SSSE3; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ABGRToYJRow = ABGRToYJRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ABGRToYJRow = ABGRToYJRow_AVX2; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + ABGRToYJRow = ABGRToYJRow_Any_NEON; + if (IS_ALIGNED(width, 16)) { + ABGRToYJRow = ABGRToYJRow_NEON; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_NEON_DOTPROD) + if (TestCpuFlag(kCpuHasNeonDotProd)) { + ABGRToYJRow = ABGRToYJRow_Any_NEON_DotProd; + if (IS_ALIGNED(width, 16)) { + ABGRToYJRow = ABGRToYJRow_NEON_DotProd; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ABGRToYJRow = ABGRToYJRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ABGRToYJRow = ABGRToYJRow_LSX; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ABGRToYJRow = ABGRToYJRow_Any_LASX; + if (IS_ALIGNED(width, 32)) { + ABGRToYJRow = ABGRToYJRow_LASX; + } + } +#endif +#if defined(HAS_ABGRTOYJROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + ABGRToYJRow = ABGRToYJRow_RVV; + } +#endif + + for (y = 0; y < height; ++y) { + ABGRToYJRow(src_abgr, dst_yj, width); + src_abgr += src_stride_abgr; + dst_yj += dst_stride_yj; + } + return 0; } // Convert ARGB to AR64. @@ -2025,19 +3031,17 @@ int ARGBToAR64(const uint8_t* src_argb, int y; void (*ARGBToAR64Row)(const uint8_t* src_argb, uint16_t* dst_ar64, int width) = ARGBToAR64Row_C; - if (!src_argb || !dst_ar64 || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_argb || !dst_ar64 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_argb = src_argb + (ptrdiff_t)(height - 1) * src_stride_argb; + src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride_argb == width * 4 && dst_stride_ar64 == width * 4 && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_argb == width * 4 && dst_stride_ar64 == width * 4) { width *= height; height = 1; src_stride_argb = dst_stride_ar64 = 0; @@ -2091,19 +3095,17 @@ int ARGBToAB64(const uint8_t* src_argb, int y; void (*ARGBToAB64Row)(const uint8_t* src_argb, uint16_t* dst_ar64, int width) = ARGBToAB64Row_C; - if (!src_argb || !dst_ab64 || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_argb || !dst_ab64 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_argb = src_argb + (ptrdiff_t)(height - 1) * src_stride_argb; + src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride_argb == width * 4 && dst_stride_ab64 == width * 4 && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_argb == width * 4 && dst_stride_ab64 == width * 4) { width *= height; height = 1; src_stride_argb = dst_stride_ab64 = 0; @@ -2168,7 +3170,7 @@ int RAWToNV21Matrix(const uint8_t* src_raw, void (*ARGBToYMatrixRow)(const uint8_t* src_argb, uint8_t* dst_y, int width, const struct ArgbConstants* c) = ARGBToYMatrixRow_C; void (*MergeUVRow)(const uint8_t* src_uj, const uint8_t* src_vj, - uint8_t* dst_vu, int width) = MergeUVRow_C; + uint8_t* dst_vu, int width) = MergeUVRow_C; #if defined(HAS_ARGBTOYMATRIXROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToYMatrixRow = ARGBToYMatrixRow_Any_SSSE3; @@ -2231,14 +3233,14 @@ int RAWToNV21Matrix(const uint8_t* src_raw, } #endif - if (!src_raw || !dst_y || !dst_vu || !argbconstants || width <= 0 || - height == 0 || height == INT_MIN) { + + if (!src_raw || !dst_y || !dst_vu || !argbconstants || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_raw = src_raw + (ptrdiff_t)(height - 1) * src_stride_raw; + src_raw = src_raw + (height - 1) * src_stride_raw; src_stride_raw = -src_stride_raw; } @@ -2300,6 +3302,30 @@ int RAWToNV21Matrix(const uint8_t* src_raw, RAWToARGBRow = RAWToARGBRow_RVV; } #endif +#if defined(HAS_ARGBTOUVMATRIXROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_SSSE3; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2; + if (IS_ALIGNED(width, 32)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX2; + } + } +#endif +#if defined(HAS_ARGBTOUVMATRIXROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX512BW; + if (IS_ALIGNED(width, 64)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX512BW; + } + } +#endif #if defined(HAS_ARGBTOUVMATRIXROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON; @@ -2309,7 +3335,7 @@ int RAWToNV21Matrix(const uint8_t* src_raw, } #endif #if defined(HAS_ARGBTOUVMATRIXROW_NEON_I8MM) - if (TestCpuFlag(kCpuHasNEON) && TestCpuFlag(kCpuHasNeonI8MM)) { + if (TestCpuFlag(kCpuHasNeonI8MM)) { ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_NEON_I8MM; if (IS_ALIGNED(width, 16)) { ARGBToUVMatrixRow = ARGBToUVMatrixRow_NEON_I8MM; @@ -2330,27 +3356,19 @@ int RAWToNV21Matrix(const uint8_t* src_raw, } } #endif -#if defined(HAS_ARGBTOUVMATRIXROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_SSSE3; +#if defined(HAS_ARGBTOUVMATRIXROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_LSX; + if (IS_ALIGNED(width, 16)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_LSX; } } #endif -#if defined(HAS_ARGBTOUVMATRIXROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX2; +#if defined(HAS_ARGBTOUVMATRIXROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_LASX; if (IS_ALIGNED(width, 32)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOUVMATRIXROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - ARGBToUVMatrixRow = ARGBToUVMatrixRow_AVX512BW; + ARGBToUVMatrixRow = ARGBToUVMatrixRow_LASX; } } #endif @@ -2427,8 +3445,7 @@ int RAWToNV21Matrix(const uint8_t* src_raw, ARGBToUVMatrixRow(row, row_size, row_u, row_v, width, argbconstants); MergeUVRow(row_v, row_u, dst_vu, halfwidth); ARGBToYMatrixRow(row, dst_y, width, argbconstants); - ARGBToYMatrixRow(row + row_size, dst_y + dst_stride_y, width, - argbconstants); + ARGBToYMatrixRow(row + row_size, dst_y + dst_stride_y, width, argbconstants); src_raw += src_stride_raw * 2; dst_y += dst_stride_y * 2; dst_vu += dst_stride_vu; @@ -2486,7 +3503,74 @@ int RGB24ToNV12(const uint8_t* src_rgb24, height); } + + +// Convert ARGB to J444. +LIBYUV_API +int ARGBToJ444(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_yj, + int dst_stride_yj, + uint8_t* dst_uj, + int dst_stride_uj, + uint8_t* dst_vj, + int dst_stride_vj, + int width, + int height) { + return ARGBToI444Matrix(src_argb, src_stride_argb, dst_yj, dst_stride_yj, + dst_uj, dst_stride_uj, dst_vj, dst_stride_vj, + &kArgbJPEGConstants, width, height); +} + +// Convert ARGB to J422. +LIBYUV_API +int ARGBToJ422(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_yj, + int dst_stride_yj, + uint8_t* dst_uj, + int dst_stride_uj, + uint8_t* dst_vj, + int dst_stride_vj, + int width, + int height) { + return ARGBToI422Matrix(src_argb, src_stride_argb, dst_yj, dst_stride_yj, + dst_uj, dst_stride_uj, dst_vj, dst_stride_vj, + &kArgbJPEGConstants, width, height); +} + +// Convert ARGB to J420. (JPeg full range I420). +LIBYUV_API +int ARGBToJ420(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_yj, + int dst_stride_yj, + uint8_t* dst_uj, + int dst_stride_uj, + uint8_t* dst_vj, + int dst_stride_vj, + int width, + int height) { + return ARGBToI420Matrix(src_argb, src_stride_argb, dst_yj, dst_stride_yj, + dst_uj, dst_stride_uj, dst_vj, dst_stride_vj, + &kArgbJPEGConstants, width, height); +} + +// Convert ARGB to J400. +LIBYUV_API +int ARGBToJ400(const uint8_t* src_argb, + int src_stride_argb, + uint8_t* dst_yj, + int dst_stride_yj, + int width, + int height) { + return ARGBToI400Matrix(src_argb, src_stride_argb, dst_yj, dst_stride_yj, + &kArgbJPEGConstants, width, height); +} + #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif + + diff --git a/source/convert_to_argb.cc b/source/convert_to_argb.cc index 720cb0984..72d21b042 100644 --- a/source/convert_to_argb.cc +++ b/source/convert_to_argb.cc @@ -11,7 +11,6 @@ #include "libyuv/convert_argb.h" #include -#include #include #include @@ -51,26 +50,12 @@ int ConvertToARGB(const uint8_t* sample, int crop_height, enum RotationMode rotation, uint32_t fourcc) { - if (src_height == INT_MIN || crop_height == INT_MIN) { - return -1; - } - - int abs_src_height = (src_height < 0) ? -src_height : src_height; - int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height; - - if (dst_argb == NULL || sample == NULL || src_width <= 0 || - src_width > INT_MAX / 4 || crop_width <= 0 || crop_width > INT_MAX / 4 || - src_height == 0 || crop_height == 0 || crop_x < 0 || crop_y < 0 || - crop_width > src_width || crop_x > src_width - crop_width || - abs_crop_height > abs_src_height || - crop_y > abs_src_height - abs_crop_height) { - return -1; - } - uint32_t format = CanonicalFourCC(fourcc); int aligned_src_width = (src_width + 1) & ~1; const uint8_t* src; const uint8_t* src_uv; + int abs_src_height = (src_height < 0) ? -src_height : src_height; + int inv_crop_height = (crop_height < 0) ? -crop_height : crop_height; int r = 0; // One pass rotation is available for some formats. For the rest, convert @@ -83,8 +68,13 @@ int ConvertToARGB(const uint8_t* sample, uint8_t* dest_argb = dst_argb; int dest_dst_stride_argb = dst_stride_argb; uint8_t* rotate_buffer = NULL; - int inv_crop_height = (crop_height < 0) ? -crop_height : crop_height; + int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height; + if (dst_argb == NULL || sample == NULL || src_width <= 0 || + src_width > INT_MAX / 4 || crop_width <= 0 || crop_width > INT_MAX / 4 || + src_height == 0 || crop_height == 0) { + return -1; + } if (src_height < 0) { inv_crop_height = -inv_crop_height; } @@ -106,97 +96,95 @@ int ConvertToARGB(const uint8_t* sample, switch (format) { // Single plane formats case FOURCC_YUY2: - src = sample + ((ptrdiff_t)aligned_src_width * crop_y + crop_x) * 2; + src = sample + (aligned_src_width * crop_y + crop_x) * 2; r = YUY2ToARGB(src, aligned_src_width * 2, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_UYVY: - src = sample + ((ptrdiff_t)aligned_src_width * crop_y + crop_x) * 2; + src = sample + (aligned_src_width * crop_y + crop_x) * 2; r = UYVYToARGB(src, aligned_src_width * 2, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_24BG: - src = sample + ((ptrdiff_t)src_width * crop_y + crop_x) * 3; + src = sample + (src_width * crop_y + crop_x) * 3; r = RGB24ToARGB(src, src_width * 3, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_RAW: - src = sample + ((ptrdiff_t)src_width * crop_y + crop_x) * 3; + src = sample + (src_width * crop_y + crop_x) * 3; r = RAWToARGB(src, src_width * 3, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_ARGB: if (!need_buf && !rotation) { - src = sample + ((ptrdiff_t)src_width * crop_y + crop_x) * 4; + src = sample + (src_width * crop_y + crop_x) * 4; r = ARGBToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width, inv_crop_height); } break; case FOURCC_BGRA: - src = sample + ((ptrdiff_t)src_width * crop_y + crop_x) * 4; + src = sample + (src_width * crop_y + crop_x) * 4; r = BGRAToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_ABGR: - src = sample + ((ptrdiff_t)src_width * crop_y + crop_x) * 4; + src = sample + (src_width * crop_y + crop_x) * 4; r = ABGRToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_RGBA: - src = sample + ((ptrdiff_t)src_width * crop_y + crop_x) * 4; + src = sample + (src_width * crop_y + crop_x) * 4; r = RGBAToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_AR30: - src = sample + ((ptrdiff_t)src_width * crop_y + crop_x) * 4; + src = sample + (src_width * crop_y + crop_x) * 4; r = AR30ToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_AB30: - src = sample + ((ptrdiff_t)src_width * crop_y + crop_x) * 4; + src = sample + (src_width * crop_y + crop_x) * 4; r = AB30ToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_RGBP: - src = sample + ((ptrdiff_t)src_width * crop_y + crop_x) * 2; + src = sample + (src_width * crop_y + crop_x) * 2; r = RGB565ToARGB(src, src_width * 2, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_RGBO: - src = sample + ((ptrdiff_t)src_width * crop_y + crop_x) * 2; + src = sample + (src_width * crop_y + crop_x) * 2; r = ARGB1555ToARGB(src, src_width * 2, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_R444: - src = sample + ((ptrdiff_t)src_width * crop_y + crop_x) * 2; + src = sample + (src_width * crop_y + crop_x) * 2; r = ARGB4444ToARGB(src, src_width * 2, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_I400: - src = sample + (ptrdiff_t)src_width * crop_y + crop_x; + src = sample + src_width * crop_y + crop_x; r = I400ToARGB(src, src_width, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_J400: - src = sample + (ptrdiff_t)src_width * crop_y + crop_x; + src = sample + src_width * crop_y + crop_x; r = J400ToARGB(src, src_width, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; // Biplanar formats case FOURCC_NV12: - src = sample + ((ptrdiff_t)src_width * crop_y + crop_x); - src_uv = sample + - aligned_src_width * ((ptrdiff_t)abs_src_height + crop_y / 2) + - crop_x; + src = sample + (src_width * crop_y + crop_x); + src_uv = + sample + aligned_src_width * (abs_src_height + crop_y / 2) + crop_x; r = NV12ToARGB(src, src_width, src_uv, aligned_src_width, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_NV21: - src = sample + ((ptrdiff_t)src_width * crop_y + crop_x); - src_uv = sample + - aligned_src_width * ((ptrdiff_t)abs_src_height + crop_y / 2) + - crop_x; + src = sample + (src_width * crop_y + crop_x); + src_uv = + sample + aligned_src_width * (abs_src_height + crop_y / 2) + crop_x; // Call NV12 but with u and v parameters swapped. r = NV21ToARGB(src, src_width, src_uv, aligned_src_width, dst_argb, dst_stride_argb, crop_width, inv_crop_height); @@ -204,21 +192,21 @@ int ConvertToARGB(const uint8_t* sample, // Triplanar formats case FOURCC_I420: case FOURCC_YV12: { - const uint8_t* src_y = sample + ((ptrdiff_t)src_width * crop_y + crop_x); + const uint8_t* src_y = sample + (src_width * crop_y + crop_x); const uint8_t* src_u; const uint8_t* src_v; int halfwidth = (src_width + 1) / 2; int halfheight = (abs_src_height + 1) / 2; if (format == FOURCC_YV12) { - src_v = sample + (ptrdiff_t)src_width * abs_src_height + - ((ptrdiff_t)halfwidth * crop_y + crop_x) / 2; - src_u = sample + (ptrdiff_t)src_width * abs_src_height + - halfwidth * ((ptrdiff_t)halfheight + crop_y / 2) + crop_x / 2; + src_v = sample + src_width * abs_src_height + + (halfwidth * crop_y + crop_x) / 2; + src_u = sample + src_width * abs_src_height + + halfwidth * (halfheight + crop_y / 2) + crop_x / 2; } else { - src_u = sample + (ptrdiff_t)src_width * abs_src_height + - ((ptrdiff_t)halfwidth * crop_y + crop_x) / 2; - src_v = sample + (ptrdiff_t)src_width * abs_src_height + - halfwidth * ((ptrdiff_t)halfheight + crop_y / 2) + crop_x / 2; + src_u = sample + src_width * abs_src_height + + (halfwidth * crop_y + crop_x) / 2; + src_v = sample + src_width * abs_src_height + + halfwidth * (halfheight + crop_y / 2) + crop_x / 2; } r = I420ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth, dst_argb, dst_stride_argb, crop_width, inv_crop_height); @@ -228,12 +216,11 @@ int ConvertToARGB(const uint8_t* sample, case FOURCC_J420: { int halfwidth = (src_width + 1) / 2; int halfheight = (abs_src_height + 1) / 2; - const uint8_t* src_y = sample + ((ptrdiff_t)src_width * crop_y + crop_x); - const uint8_t* src_u = sample + (ptrdiff_t)src_width * abs_src_height + - ((ptrdiff_t)halfwidth * crop_y + crop_x) / 2; - const uint8_t* src_v = sample + (ptrdiff_t)src_width * abs_src_height + - halfwidth * ((ptrdiff_t)halfheight + crop_y / 2) + - crop_x / 2; + const uint8_t* src_y = sample + (src_width * crop_y + crop_x); + const uint8_t* src_u = sample + src_width * abs_src_height + + (halfwidth * crop_y + crop_x) / 2; + const uint8_t* src_v = sample + src_width * abs_src_height + + halfwidth * (halfheight + crop_y / 2) + crop_x / 2; r = J420ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; @@ -242,12 +229,11 @@ int ConvertToARGB(const uint8_t* sample, case FOURCC_H420: { int halfwidth = (src_width + 1) / 2; int halfheight = (abs_src_height + 1) / 2; - const uint8_t* src_y = sample + ((ptrdiff_t)src_width * crop_y + crop_x); - const uint8_t* src_u = sample + (ptrdiff_t)src_width * abs_src_height + - ((ptrdiff_t)halfwidth * crop_y + crop_x) / 2; - const uint8_t* src_v = sample + (ptrdiff_t)src_width * abs_src_height + - halfwidth * ((ptrdiff_t)halfheight + crop_y / 2) + - crop_x / 2; + const uint8_t* src_y = sample + (src_width * crop_y + crop_x); + const uint8_t* src_u = sample + src_width * abs_src_height + + (halfwidth * crop_y + crop_x) / 2; + const uint8_t* src_v = sample + src_width * abs_src_height + + halfwidth * (halfheight + crop_y / 2) + crop_x / 2; r = H420ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; @@ -256,12 +242,11 @@ int ConvertToARGB(const uint8_t* sample, case FOURCC_U420: { int halfwidth = (src_width + 1) / 2; int halfheight = (abs_src_height + 1) / 2; - const uint8_t* src_y = sample + ((ptrdiff_t)src_width * crop_y + crop_x); - const uint8_t* src_u = sample + (ptrdiff_t)src_width * abs_src_height + - ((ptrdiff_t)halfwidth * crop_y + crop_x) / 2; - const uint8_t* src_v = sample + (ptrdiff_t)src_width * abs_src_height + - halfwidth * ((ptrdiff_t)halfheight + crop_y / 2) + - crop_x / 2; + const uint8_t* src_y = sample + (src_width * crop_y + crop_x); + const uint8_t* src_u = sample + src_width * abs_src_height + + (halfwidth * crop_y + crop_x) / 2; + const uint8_t* src_v = sample + src_width * abs_src_height + + halfwidth * (halfheight + crop_y / 2) + crop_x / 2; r = U420ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; @@ -270,19 +255,19 @@ int ConvertToARGB(const uint8_t* sample, case FOURCC_I422: case FOURCC_YV16: { int halfwidth = (src_width + 1) / 2; - const uint8_t* src_y = sample + (ptrdiff_t)src_width * crop_y + crop_x; + const uint8_t* src_y = sample + src_width * crop_y + crop_x; const uint8_t* src_u; const uint8_t* src_v; if (format == FOURCC_YV16) { - src_v = sample + (ptrdiff_t)src_width * abs_src_height + - (ptrdiff_t)halfwidth * crop_y + crop_x / 2; - src_u = sample + (ptrdiff_t)src_width * abs_src_height + - halfwidth * ((ptrdiff_t)abs_src_height + crop_y) + crop_x / 2; + src_v = sample + src_width * abs_src_height + halfwidth * crop_y + + crop_x / 2; + src_u = sample + src_width * abs_src_height + + halfwidth * (abs_src_height + crop_y) + crop_x / 2; } else { - src_u = sample + (ptrdiff_t)src_width * abs_src_height + - (ptrdiff_t)halfwidth * crop_y + crop_x / 2; - src_v = sample + (ptrdiff_t)src_width * abs_src_height + - halfwidth * ((ptrdiff_t)abs_src_height + crop_y) + crop_x / 2; + src_u = sample + src_width * abs_src_height + halfwidth * crop_y + + crop_x / 2; + src_v = sample + src_width * abs_src_height + + halfwidth * (abs_src_height + crop_y) + crop_x / 2; } r = I422ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth, dst_argb, dst_stride_argb, crop_width, inv_crop_height); @@ -291,12 +276,11 @@ int ConvertToARGB(const uint8_t* sample, case FOURCC_J422: { int halfwidth = (src_width + 1) / 2; - const uint8_t* src_y = sample + (ptrdiff_t)src_width * crop_y + crop_x; - const uint8_t* src_u = sample + (ptrdiff_t)src_width * abs_src_height + - (ptrdiff_t)halfwidth * crop_y + crop_x / 2; - const uint8_t* src_v = sample + (ptrdiff_t)src_width * abs_src_height + - halfwidth * ((ptrdiff_t)abs_src_height + crop_y) + - crop_x / 2; + const uint8_t* src_y = sample + src_width * crop_y + crop_x; + const uint8_t* src_u = + sample + src_width * abs_src_height + halfwidth * crop_y + crop_x / 2; + const uint8_t* src_v = sample + src_width * abs_src_height + + halfwidth * (abs_src_height + crop_y) + crop_x / 2; r = J422ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; @@ -304,12 +288,11 @@ int ConvertToARGB(const uint8_t* sample, case FOURCC_H422: { int halfwidth = (src_width + 1) / 2; - const uint8_t* src_y = sample + (ptrdiff_t)src_width * crop_y + crop_x; - const uint8_t* src_u = sample + (ptrdiff_t)src_width * abs_src_height + - (ptrdiff_t)halfwidth * crop_y + crop_x / 2; - const uint8_t* src_v = sample + (ptrdiff_t)src_width * abs_src_height + - halfwidth * ((ptrdiff_t)abs_src_height + crop_y) + - crop_x / 2; + const uint8_t* src_y = sample + src_width * crop_y + crop_x; + const uint8_t* src_u = + sample + src_width * abs_src_height + halfwidth * crop_y + crop_x / 2; + const uint8_t* src_v = sample + src_width * abs_src_height + + halfwidth * (abs_src_height + crop_y) + crop_x / 2; r = H422ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; @@ -317,12 +300,11 @@ int ConvertToARGB(const uint8_t* sample, case FOURCC_U422: { int halfwidth = (src_width + 1) / 2; - const uint8_t* src_y = sample + (ptrdiff_t)src_width * crop_y + crop_x; - const uint8_t* src_u = sample + (ptrdiff_t)src_width * abs_src_height + - (ptrdiff_t)halfwidth * crop_y + crop_x / 2; - const uint8_t* src_v = sample + (ptrdiff_t)src_width * abs_src_height + - halfwidth * ((ptrdiff_t)abs_src_height + crop_y) + - crop_x / 2; + const uint8_t* src_y = sample + src_width * crop_y + crop_x; + const uint8_t* src_u = + sample + src_width * abs_src_height + halfwidth * crop_y + crop_x / 2; + const uint8_t* src_v = sample + src_width * abs_src_height + + halfwidth * (abs_src_height + crop_y) + crop_x / 2; r = H422ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; @@ -330,19 +312,15 @@ int ConvertToARGB(const uint8_t* sample, case FOURCC_I444: case FOURCC_YV24: { - const uint8_t* src_y = sample + (ptrdiff_t)src_width * crop_y + crop_x; + const uint8_t* src_y = sample + src_width * crop_y + crop_x; const uint8_t* src_u; const uint8_t* src_v; if (format == FOURCC_YV24) { - src_v = - sample + src_width * ((ptrdiff_t)abs_src_height + crop_y) + crop_x; - src_u = sample + src_width * ((ptrdiff_t)abs_src_height * 2 + crop_y) + - crop_x; + src_v = sample + src_width * (abs_src_height + crop_y) + crop_x; + src_u = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x; } else { - src_u = - sample + src_width * ((ptrdiff_t)abs_src_height + crop_y) + crop_x; - src_v = sample + src_width * ((ptrdiff_t)abs_src_height * 2 + crop_y) + - crop_x; + src_u = sample + src_width * (abs_src_height + crop_y) + crop_x; + src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x; } r = I444ToARGB(src_y, src_width, src_u, src_width, src_v, src_width, dst_argb, dst_stride_argb, crop_width, inv_crop_height); @@ -350,36 +328,33 @@ int ConvertToARGB(const uint8_t* sample, } case FOURCC_J444: { - const uint8_t* src_y = sample + (ptrdiff_t)src_width * crop_y + crop_x; - const uint8_t* src_u = - sample + src_width * ((ptrdiff_t)abs_src_height + crop_y) + crop_x; - const uint8_t* src_v = - sample + src_width * ((ptrdiff_t)abs_src_height * 2 + crop_y) + - crop_x; + const uint8_t* src_y = sample + src_width * crop_y + crop_x; + const uint8_t* src_u; + const uint8_t* src_v; + src_u = sample + src_width * (abs_src_height + crop_y) + crop_x; + src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x; r = J444ToARGB(src_y, src_width, src_u, src_width, src_v, src_width, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; } case FOURCC_H444: { - const uint8_t* src_y = sample + (ptrdiff_t)src_width * crop_y + crop_x; - const uint8_t* src_u = - sample + src_width * ((ptrdiff_t)abs_src_height + crop_y) + crop_x; - const uint8_t* src_v = - sample + src_width * ((ptrdiff_t)abs_src_height * 2 + crop_y) + - crop_x; + const uint8_t* src_y = sample + src_width * crop_y + crop_x; + const uint8_t* src_u; + const uint8_t* src_v; + src_u = sample + src_width * (abs_src_height + crop_y) + crop_x; + src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x; r = H444ToARGB(src_y, src_width, src_u, src_width, src_v, src_width, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; } case FOURCC_U444: { - const uint8_t* src_y = sample + (ptrdiff_t)src_width * crop_y + crop_x; - const uint8_t* src_u = - sample + src_width * ((ptrdiff_t)abs_src_height + crop_y) + crop_x; - const uint8_t* src_v = - sample + src_width * ((ptrdiff_t)abs_src_height * 2 + crop_y) + - crop_x; + const uint8_t* src_y = sample + src_width * crop_y + crop_x; + const uint8_t* src_u; + const uint8_t* src_v; + src_u = sample + src_width * (abs_src_height + crop_y) + crop_x; + src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x; r = U444ToARGB(src_y, src_width, src_u, src_width, src_v, src_width, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; @@ -402,7 +377,7 @@ int ConvertToARGB(const uint8_t* sample, } free(rotate_buffer); } else if (rotation) { - src = sample + ((ptrdiff_t)src_width * crop_y + crop_x) * 4; + src = sample + (src_width * crop_y + crop_x) * 4; r = ARGBRotate(src, src_width * 4, dst_argb, dst_stride_argb, crop_width, inv_crop_height, rotation); } diff --git a/source/convert_to_i420.cc b/source/convert_to_i420.cc index baa4a9494..aab071e1a 100644 --- a/source/convert_to_i420.cc +++ b/source/convert_to_i420.cc @@ -44,24 +44,12 @@ int ConvertToI420(const uint8_t* sample, int crop_height, enum RotationMode rotation, uint32_t fourcc) { - if (src_height == INT_MIN || crop_height == INT_MIN) { - return -1; - } - - const int abs_src_height = (src_height < 0) ? -src_height : src_height; - const int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height; - - if (!dst_y || !dst_u || !dst_v || !sample || src_width <= 0 || - src_width > INT_MAX / 4 || crop_width <= 0 || src_height == 0 || - crop_height == 0 || crop_x < 0 || crop_y < 0 || crop_width > src_width || - crop_x > src_width - crop_width || abs_crop_height > abs_src_height || - crop_y > abs_src_height - abs_crop_height) { - return -1; - } - uint32_t format = CanonicalFourCC(fourcc); + int aligned_src_width = (src_width + 1) & ~1; const uint8_t* src; const uint8_t* src_uv; + const int abs_src_height = (src_height < 0) ? -src_height : src_height; + const int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height; int r = 0; LIBYUV_BOOL need_buf = (rotation && format != FOURCC_I420 && format != FOURCC_NV12 && @@ -76,7 +64,12 @@ int ConvertToI420(const uint8_t* sample, uint8_t* rotate_buffer = NULL; const int inv_crop_height = (src_height < 0) ? -abs_crop_height : abs_crop_height; - int aligned_src_width = (src_width + 1) & ~1; + + if (!dst_y || !dst_u || !dst_v || !sample || src_width <= 0 || + src_width > INT_MAX / 4 || crop_width <= 0 || src_height == 0 || + crop_height == 0) { + return -1; + } // One pass rotation is available for some formats. For the rest, convert // to I420 (with optional vertical flipping) into a temporary I420 buffer, @@ -84,14 +77,14 @@ int ConvertToI420(const uint8_t* sample, // For in-place conversion, if destination dst_y is same as source sample, // also enable temporary buffer. if (need_buf) { - size_t y_size = (size_t)crop_width * abs_crop_height; - size_t uv_size = - (size_t)((crop_width + 1) / 2) * ((abs_crop_height + 1) / 2); - if (uv_size > SIZE_MAX / 2 || y_size > SIZE_MAX - uv_size * 2) { + int y_size = crop_width * abs_crop_height; + int uv_size = ((crop_width + 1) / 2) * ((abs_crop_height + 1) / 2); + const uint64_t rotate_buffer_size = + (uint64_t)y_size + (uint64_t)uv_size * 2; + if (rotate_buffer_size > SIZE_MAX) { return -1; // Invalid size. } - const size_t rotate_buffer_size = y_size + uv_size * 2; - rotate_buffer = (uint8_t*)malloc(rotate_buffer_size); + rotate_buffer = (uint8_t*)malloc((size_t)rotate_buffer_size); if (!rotate_buffer) { return 1; // Out of memory runtime error. } @@ -109,7 +102,7 @@ int ConvertToI420(const uint8_t* sample, uint8_t* v = (crop_x & 1) ? dst_u : dst_v; int stride_u = (crop_x & 1) ? dst_stride_v : dst_stride_u; int stride_v = (crop_x & 1) ? dst_stride_u : dst_stride_v; - src = sample + ((ptrdiff_t)aligned_src_width * crop_y + crop_x) * 2; + src = sample + (aligned_src_width * crop_y + crop_x) * 2; r = YUY2ToI420(src, aligned_src_width * 2, dst_y, dst_stride_y, u, stride_u, v, stride_v, crop_width, inv_crop_height); break; @@ -119,86 +112,84 @@ int ConvertToI420(const uint8_t* sample, uint8_t* v = (crop_x & 1) ? dst_u : dst_v; int stride_u = (crop_x & 1) ? dst_stride_v : dst_stride_u; int stride_v = (crop_x & 1) ? dst_stride_u : dst_stride_v; - src = sample + ((ptrdiff_t)aligned_src_width * crop_y + crop_x) * 2; + src = sample + (aligned_src_width * crop_y + crop_x) * 2; r = UYVYToI420(src, aligned_src_width * 2, dst_y, dst_stride_y, u, stride_u, v, stride_v, crop_width, inv_crop_height); break; } case FOURCC_RGBP: - src = sample + ((ptrdiff_t)src_width * crop_y + crop_x) * 2; + src = sample + (src_width * crop_y + crop_x) * 2; r = RGB565ToI420(src, src_width * 2, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, crop_width, inv_crop_height); break; case FOURCC_RGBO: - src = sample + ((ptrdiff_t)src_width * crop_y + crop_x) * 2; + src = sample + (src_width * crop_y + crop_x) * 2; r = ARGB1555ToI420(src, src_width * 2, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, crop_width, inv_crop_height); break; case FOURCC_R444: - src = sample + ((ptrdiff_t)src_width * crop_y + crop_x) * 2; + src = sample + (src_width * crop_y + crop_x) * 2; r = ARGB4444ToI420(src, src_width * 2, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, crop_width, inv_crop_height); break; case FOURCC_24BG: - src = sample + ((ptrdiff_t)src_width * crop_y + crop_x) * 3; + src = sample + (src_width * crop_y + crop_x) * 3; r = RGB24ToI420(src, src_width * 3, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, crop_width, inv_crop_height); break; case FOURCC_RAW: - src = sample + ((ptrdiff_t)src_width * crop_y + crop_x) * 3; + src = sample + (src_width * crop_y + crop_x) * 3; r = RAWToI420(src, src_width * 3, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, crop_width, inv_crop_height); break; case FOURCC_ARGB: - src = sample + ((ptrdiff_t)src_width * crop_y + crop_x) * 4; + src = sample + (src_width * crop_y + crop_x) * 4; r = ARGBToI420(src, src_width * 4, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, crop_width, inv_crop_height); break; case FOURCC_BGRA: - src = sample + ((ptrdiff_t)src_width * crop_y + crop_x) * 4; + src = sample + (src_width * crop_y + crop_x) * 4; r = BGRAToI420(src, src_width * 4, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, crop_width, inv_crop_height); break; case FOURCC_ABGR: - src = sample + ((ptrdiff_t)src_width * crop_y + crop_x) * 4; + src = sample + (src_width * crop_y + crop_x) * 4; r = ABGRToI420(src, src_width * 4, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, crop_width, inv_crop_height); break; case FOURCC_RGBA: - src = sample + ((ptrdiff_t)src_width * crop_y + crop_x) * 4; + src = sample + (src_width * crop_y + crop_x) * 4; r = RGBAToI420(src, src_width * 4, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, crop_width, inv_crop_height); break; // TODO(fbarchard): Add AR30 and AB30 case FOURCC_I400: - src = sample + (ptrdiff_t)src_width * crop_y + crop_x; + src = sample + src_width * crop_y + crop_x; r = I400ToI420(src, src_width, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, crop_width, inv_crop_height); break; // Biplanar formats case FOURCC_NV12: - src = sample + ((ptrdiff_t)src_width * crop_y + crop_x); - src_uv = sample + ((ptrdiff_t)src_width * abs_src_height) + - ((ptrdiff_t)(crop_y / 2) * aligned_src_width) + - ((crop_x / 2) * 2); + src = sample + (src_width * crop_y + crop_x); + src_uv = sample + (src_width * abs_src_height) + + ((crop_y / 2) * aligned_src_width) + ((crop_x / 2) * 2); r = NV12ToI420Rotate(src, src_width, src_uv, aligned_src_width, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, crop_width, inv_crop_height, rotation); break; case FOURCC_NV21: - src = sample + ((ptrdiff_t)src_width * crop_y + crop_x); - src_uv = sample + ((ptrdiff_t)src_width * abs_src_height) + - ((ptrdiff_t)(crop_y / 2) * aligned_src_width) + - ((crop_x / 2) * 2); + src = sample + (src_width * crop_y + crop_x); + src_uv = sample + (src_width * abs_src_height) + + ((crop_y / 2) * aligned_src_width) + ((crop_x / 2) * 2); // Call NV12 but with dst_u and dst_v parameters swapped. r = NV12ToI420Rotate(src, src_width, src_uv, aligned_src_width, dst_y, dst_stride_y, dst_v, dst_stride_v, dst_u, @@ -207,23 +198,21 @@ int ConvertToI420(const uint8_t* sample, // Triplanar formats case FOURCC_I420: case FOURCC_YV12: { - const uint8_t* src_y = sample + ((ptrdiff_t)src_width * crop_y + crop_x); + const uint8_t* src_y = sample + (src_width * crop_y + crop_x); const uint8_t* src_u; const uint8_t* src_v; int halfwidth = (src_width + 1) / 2; int halfheight = (abs_src_height + 1) / 2; if (format == FOURCC_YV12) { - src_v = sample + (ptrdiff_t)src_width * abs_src_height + - (ptrdiff_t)halfwidth * (crop_y / 2) + (crop_x / 2); - src_u = sample + (ptrdiff_t)src_width * abs_src_height + - halfwidth * ((ptrdiff_t)halfheight + (crop_y / 2)) + + src_v = sample + src_width * abs_src_height + halfwidth * (crop_y / 2) + (crop_x / 2); + src_u = sample + src_width * abs_src_height + + halfwidth * (halfheight + (crop_y / 2)) + (crop_x / 2); } else { - src_u = sample + (ptrdiff_t)src_width * abs_src_height + - (ptrdiff_t)halfwidth * (crop_y / 2) + (crop_x / 2); - src_v = sample + (ptrdiff_t)src_width * abs_src_height + - halfwidth * ((ptrdiff_t)halfheight + (crop_y / 2)) + + src_u = sample + src_width * abs_src_height + halfwidth * (crop_y / 2) + (crop_x / 2); + src_v = sample + src_width * abs_src_height + + halfwidth * (halfheight + (crop_y / 2)) + (crop_x / 2); } r = I420Rotate(src_y, src_width, src_u, halfwidth, src_v, halfwidth, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, @@ -232,20 +221,20 @@ int ConvertToI420(const uint8_t* sample, } case FOURCC_I422: case FOURCC_YV16: { - const uint8_t* src_y = sample + (ptrdiff_t)src_width * crop_y + crop_x; + const uint8_t* src_y = sample + src_width * crop_y + crop_x; const uint8_t* src_u; const uint8_t* src_v; int halfwidth = (src_width + 1) / 2; if (format == FOURCC_YV16) { - src_v = sample + (ptrdiff_t)src_width * abs_src_height + - (ptrdiff_t)halfwidth * crop_y + (crop_x / 2); - src_u = sample + (ptrdiff_t)src_width * abs_src_height + - halfwidth * ((ptrdiff_t)abs_src_height + crop_y) + (crop_x / 2); + src_v = sample + src_width * abs_src_height + halfwidth * crop_y + + (crop_x / 2); + src_u = sample + src_width * abs_src_height + + halfwidth * (abs_src_height + crop_y) + (crop_x / 2); } else { - src_u = sample + (ptrdiff_t)src_width * abs_src_height + - (ptrdiff_t)halfwidth * crop_y + (crop_x / 2); - src_v = sample + (ptrdiff_t)src_width * abs_src_height + - halfwidth * ((ptrdiff_t)abs_src_height + crop_y) + (crop_x / 2); + src_u = sample + src_width * abs_src_height + halfwidth * crop_y + + (crop_x / 2); + src_v = sample + src_width * abs_src_height + + halfwidth * (abs_src_height + crop_y) + (crop_x / 2); } r = I422ToI420(src_y, src_width, src_u, halfwidth, src_v, halfwidth, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, @@ -254,19 +243,15 @@ int ConvertToI420(const uint8_t* sample, } case FOURCC_I444: case FOURCC_YV24: { - const uint8_t* src_y = sample + (ptrdiff_t)src_width * crop_y + crop_x; + const uint8_t* src_y = sample + src_width * crop_y + crop_x; const uint8_t* src_u; const uint8_t* src_v; if (format == FOURCC_YV24) { - src_v = - sample + src_width * ((ptrdiff_t)abs_src_height + crop_y) + crop_x; - src_u = sample + src_width * ((ptrdiff_t)abs_src_height * 2 + crop_y) + - crop_x; + src_v = sample + src_width * (abs_src_height + crop_y) + crop_x; + src_u = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x; } else { - src_u = - sample + src_width * ((ptrdiff_t)abs_src_height + crop_y) + crop_x; - src_v = sample + src_width * ((ptrdiff_t)abs_src_height * 2 + crop_y) + - crop_x; + src_u = sample + src_width * (abs_src_height + crop_y) + crop_x; + src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x; } r = I444ToI420(src_y, src_width, src_u, src_width, src_v, src_width, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, diff --git a/source/cpu_id.cc b/source/cpu_id.cc index 0d7ea9a95..0cc46b10a 100644 --- a/source/cpu_id.cc +++ b/source/cpu_id.cc @@ -397,6 +397,7 @@ static SAFEBUFFERS int GetCpuFlags(void) { int cpu_info7[4] = {0, 0, 0, 0}; int cpu_einfo7[4] = {0, 0, 0, 0}; int cpu_info24[4] = {0, 0, 0, 0}; + int cpu_info21[4] = {0, 0, 0, 0}; int cpu_amdinfo21[4] = {0, 0, 0, 0}; CpuId(0, 0, cpu_info0); CpuId(1, 0, cpu_info1); @@ -405,6 +406,9 @@ static SAFEBUFFERS int GetCpuFlags(void) { CpuId(7, 1, cpu_einfo7); CpuId(0x80000021, 0, cpu_amdinfo21); } + if (cpu_info0[0] >= 0x21) { + CpuId(0x21, 0, cpu_info21); + } if (cpu_info0[0] >= 0x24) { CpuId(0x24, 0, cpu_info24); } @@ -435,7 +439,8 @@ static SAFEBUFFERS int GetCpuFlags(void) { ((cpu_info7[2] & 0x00000800) ? kCpuHasAVX512VNNI : 0) | ((cpu_info7[2] & 0x00001000) ? kCpuHasAVX512VBITALG : 0) | ((cpu_einfo7[3] & 0x00080000) ? kCpuHasAVX10 : 0) | - ((cpu_info7[3] & 0x02000000) ? kCpuHasAMXINT8 : 0); + ((cpu_info7[3] & 0x02000000) ? kCpuHasAMXINT8 : 0) | + ((cpu_info21[0] & 0x00800000) ? kCpuHasAVX512BMM : 0); if (cpu_info0[0] >= 0x24 && (cpu_einfo7[3] & 0x00080000)) { cpu_info |= ((cpu_info24[1] & 0xFF) >= 2) ? kCpuHasAVX10_2 : 0; } diff --git a/source/planar_functions.cc b/source/planar_functions.cc index 3481d643d..3b703920c 100644 --- a/source/planar_functions.cc +++ b/source/planar_functions.cc @@ -11,16 +11,16 @@ #include "libyuv/planar_functions.h" #include -#include #include // for memset() -#include "libyuv/convert_from_argb.h" // For ArgbConstants #include "libyuv/cpu_id.h" #include "libyuv/row.h" +#include "libyuv/convert_from_argb.h" #include "libyuv/scale_row.h" // for ScaleRowDown2 #ifdef __cplusplus namespace libyuv { + extern "C" { #endif @@ -34,18 +34,17 @@ void CopyPlane(const uint8_t* src_y, int height) { int y; void (*CopyRow)(const uint8_t* src, uint8_t* dst, int width) = CopyRow_C; - if (width <= 0 || height == 0 || height == INT_MIN) { + if (width <= 0 || height == 0) { return; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_y = dst_y + (ptrdiff_t)(height - 1) * dst_stride_y; + dst_y = dst_y + (height - 1) * dst_stride_y; dst_stride_y = -dst_stride_y; } // Coalesce rows. - if (src_stride_y == width && dst_stride_y == width && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_y == width && dst_stride_y == width) { width *= height; height = 1; src_stride_y = dst_stride_y = 0; @@ -123,18 +122,17 @@ void Convert16To8Plane(const uint16_t* src_y, void (*Convert16To8Row)(const uint16_t* src_y, uint8_t* dst_y, int scale, int width) = Convert16To8Row_C; - if (width <= 0 || height == 0 || height == INT_MIN) { + if (width <= 0 || height == 0) { return; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_y = dst_y + (ptrdiff_t)(height - 1) * dst_stride_y; + dst_y = dst_y + (height - 1) * dst_stride_y; dst_stride_y = -dst_stride_y; } // Coalesce rows. - if (src_stride_y == width && dst_stride_y == width && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_y == width && dst_stride_y == width) { width *= height; height = 1; src_stride_y = dst_stride_y = 0; @@ -198,18 +196,17 @@ void Convert8To16Plane(const uint8_t* src_y, void (*Convert8To16Row)(const uint8_t* src_y, uint16_t* dst_y, int scale, int width) = Convert8To16Row_C; - if (width <= 0 || height == 0 || height == INT_MIN) { + if (width <= 0 || height == 0) { return; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_y = dst_y + (ptrdiff_t)(height - 1) * dst_stride_y; + dst_y = dst_y + (height - 1) * dst_stride_y; dst_stride_y = -dst_stride_y; } // Coalesce rows. - if (src_stride_y == width && dst_stride_y == width && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_y == width && dst_stride_y == width) { width *= height; height = 1; src_stride_y = dst_stride_y = 0; @@ -266,18 +263,17 @@ void Convert8To8Plane(const uint8_t* src_y, void (*Convert8To8Row)(const uint8_t* src_y, uint8_t* dst_y, int scale, int bias, int width) = Convert8To8Row_C; - if (width <= 0 || height == 0 || height == INT_MIN) { + if (width <= 0 || height == 0) { return; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_y = dst_y + (ptrdiff_t)(height - 1) * dst_stride_y; + dst_y = dst_y + (height - 1) * dst_stride_y; dst_stride_y = -dst_stride_y; } // Coalesce rows. - if (src_stride_y == width && dst_stride_y == width && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_y == width && dst_stride_y == width) { width *= height; height = 1; src_stride_y = dst_stride_y = 0; @@ -336,16 +332,16 @@ int I422Copy(const uint8_t* src_y, int halfwidth = (width + 1) >> 1; if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 || - height == 0 || height == INT_MIN) { + height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y; - src_u = src_u + (ptrdiff_t)(height - 1) * src_stride_u; - src_v = src_v + (ptrdiff_t)(height - 1) * src_stride_v; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (height - 1) * src_stride_u; + src_v = src_v + (height - 1) * src_stride_v; src_stride_y = -src_stride_y; src_stride_u = -src_stride_u; src_stride_v = -src_stride_v; @@ -376,15 +372,15 @@ int I444Copy(const uint8_t* src_y, int width, int height) { if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 || - height == 0 || height == INT_MIN) { + height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y; - src_u = src_u + (ptrdiff_t)(height - 1) * src_stride_u; - src_v = src_v + (ptrdiff_t)(height - 1) * src_stride_v; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (height - 1) * src_stride_u; + src_v = src_v + (height - 1) * src_stride_v; src_stride_y = -src_stride_y; src_stride_u = -src_stride_u; src_stride_v = -src_stride_v; @@ -417,16 +413,16 @@ int I210Copy(const uint16_t* src_y, int halfwidth = (width + 1) >> 1; if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 || - height == 0 || height == INT_MIN) { + height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y; - src_u = src_u + (ptrdiff_t)(height - 1) * src_stride_u; - src_v = src_v + (ptrdiff_t)(height - 1) * src_stride_v; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (height - 1) * src_stride_u; + src_v = src_v + (height - 1) * src_stride_v; src_stride_y = -src_stride_y; src_stride_u = -src_stride_u; src_stride_v = -src_stride_v; @@ -458,15 +454,15 @@ int I410Copy(const uint16_t* src_y, int width, int height) { if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 || - height == 0 || height == INT_MIN) { + height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y; - src_u = src_u + (ptrdiff_t)(height - 1) * src_stride_u; - src_v = src_v + (ptrdiff_t)(height - 1) * src_stride_v; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (height - 1) * src_stride_u; + src_v = src_v + (height - 1) * src_stride_v; src_stride_y = -src_stride_y; src_stride_u = -src_stride_u; src_stride_v = -src_stride_v; @@ -488,13 +484,13 @@ int I400ToI400(const uint8_t* src_y, int dst_stride_y, int width, int height) { - if (!src_y || !dst_y || width <= 0 || height == 0 || height == INT_MIN) { + if (!src_y || !dst_y || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y; + src_y = src_y + (height - 1) * src_stride_y; src_stride_y = -src_stride_y; } CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); @@ -517,13 +513,13 @@ int I420ToI400(const uint8_t* src_y, (void)src_stride_u; (void)src_v; (void)src_stride_v; - if (!src_y || !dst_y || width <= 0 || height == 0 || height == INT_MIN) { + if (!src_y || !dst_y || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y; + src_y = src_y + (height - 1) * src_stride_y; src_stride_y = -src_stride_y; } @@ -546,8 +542,7 @@ int NV12Copy(const uint8_t* src_y, int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; - if (!src_y || !dst_y || !src_uv || !dst_uv || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y || !dst_y || !src_uv || !dst_uv || width <= 0 || height == 0) { return -1; } @@ -555,8 +550,8 @@ int NV12Copy(const uint8_t* src_y, if (height < 0) { height = -height; halfheight = (height + 1) >> 1; - src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y; - src_uv = src_uv + (ptrdiff_t)(halfheight - 1) * src_stride_uv; + src_y = src_y + (height - 1) * src_stride_y; + src_uv = src_uv + (halfheight - 1) * src_stride_uv; src_stride_y = -src_stride_y; src_stride_uv = -src_stride_uv; } @@ -596,20 +591,20 @@ void SplitUVPlane(const uint8_t* src_uv, int y; void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width) = SplitUVRow_C; - if (width <= 0 || height == 0 || height == INT_MIN) { + if (width <= 0 || height == 0) { return; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_u = dst_u + (ptrdiff_t)(height - 1) * dst_stride_u; - dst_v = dst_v + (ptrdiff_t)(height - 1) * dst_stride_v; + dst_u = dst_u + (height - 1) * dst_stride_u; + dst_v = dst_v + (height - 1) * dst_stride_v; dst_stride_u = -dst_stride_u; dst_stride_v = -dst_stride_v; } // Coalesce rows. if (src_stride_uv == width * 2 && dst_stride_u == width && - dst_stride_v == width && (ptrdiff_t)width * height <= INT_MAX) { + dst_stride_v == width) { width *= height; height = 1; src_stride_uv = dst_stride_u = dst_stride_v = 0; @@ -630,14 +625,6 @@ void SplitUVPlane(const uint8_t* src_uv, } } #endif -#if defined(HAS_SPLITUVROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - SplitUVRow = SplitUVRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - SplitUVRow = SplitUVRow_AVX512BW; - } - } -#endif #if defined(HAS_SPLITUVROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { SplitUVRow = SplitUVRow_Any_NEON; @@ -681,18 +668,18 @@ void MergeUVPlane(const uint8_t* src_u, int y; void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, int width) = MergeUVRow_C; - if (width <= 0 || height == 0 || height == INT_MIN) { + if (width <= 0 || height == 0) { return; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_uv = dst_uv + (ptrdiff_t)(height - 1) * dst_stride_uv; + dst_uv = dst_uv + (height - 1) * dst_stride_uv; dst_stride_uv = -dst_stride_uv; } // Coalesce rows. if (src_stride_u == width && src_stride_v == width && - dst_stride_uv == width * 2 && (ptrdiff_t)width * height <= INT_MAX) { + dst_stride_uv == width * 2) { width *= height; height = 1; src_stride_u = src_stride_v = dst_stride_uv = 0; @@ -773,20 +760,20 @@ void SplitUVPlane_16(const uint16_t* src_uv, void (*SplitUVRow_16)(const uint16_t* src_uv, uint16_t* dst_u, uint16_t* dst_v, int depth, int width) = SplitUVRow_16_C; - if (width <= 0 || height == 0 || height == INT_MIN) { + if (width <= 0 || height == 0) { return; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_u = dst_u + (ptrdiff_t)(height - 1) * dst_stride_u; - dst_v = dst_v + (ptrdiff_t)(height - 1) * dst_stride_v; + dst_u = dst_u + (height - 1) * dst_stride_u; + dst_v = dst_v + (height - 1) * dst_stride_v; dst_stride_u = -dst_stride_u; dst_stride_v = -dst_stride_v; } // Coalesce rows. if (src_stride_uv == width * 2 && dst_stride_u == width && - dst_stride_v == width && (ptrdiff_t)width * height <= INT_MAX) { + dst_stride_v == width) { width *= height; height = 1; src_stride_uv = dst_stride_u = dst_stride_v = 0; @@ -833,18 +820,18 @@ void MergeUVPlane_16(const uint16_t* src_u, MergeUVRow_16_C; assert(depth >= 8); assert(depth <= 16); - if (width <= 0 || height == 0 || height == INT_MIN) { + if (width <= 0 || height == 0) { return; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_uv = dst_uv + (ptrdiff_t)(height - 1) * dst_stride_uv; + dst_uv = dst_uv + (height - 1) * dst_stride_uv; dst_stride_uv = -dst_stride_uv; } // Coalesce rows. if (src_stride_u == width && src_stride_v == width && - dst_stride_uv == width * 2 && (ptrdiff_t)width * height <= INT_MAX) { + dst_stride_uv == width * 2) { width *= height; height = 1; src_stride_u = src_stride_v = dst_stride_uv = 0; @@ -893,18 +880,17 @@ void ConvertToMSBPlane_16(const uint16_t* src_y, int scale = 1 << (16 - depth); void (*MultiplyRow_16)(const uint16_t* src_y, uint16_t* dst_y, int scale, int width) = MultiplyRow_16_C; - if (width <= 0 || height == 0 || height == INT_MIN) { + if (width <= 0 || height == 0) { return; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_y = dst_y + (ptrdiff_t)(height - 1) * dst_stride_y; + dst_y = dst_y + (height - 1) * dst_stride_y; dst_stride_y = -dst_stride_y; } // Coalesce rows. - if (src_stride_y == width && dst_stride_y == width && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_y == width && dst_stride_y == width) { width *= height; height = 1; src_stride_y = dst_stride_y = 0; @@ -952,18 +938,17 @@ void ConvertToLSBPlane_16(const uint16_t* src_y, int scale = 1 << depth; void (*DivideRow)(const uint16_t* src_y, uint16_t* dst_y, int scale, int width) = DivideRow_16_C; - if (width <= 0 || height == 0 || height == INT_MIN) { + if (width <= 0 || height == 0) { return; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_y = dst_y + (ptrdiff_t)(height - 1) * dst_stride_y; + dst_y = dst_y + (height - 1) * dst_stride_y; dst_stride_y = -dst_stride_y; } // Coalesce rows. - if (src_stride_y == width && dst_stride_y == width && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_y == width && dst_stride_y == width) { width *= height; height = 1; src_stride_y = dst_stride_y = 0; @@ -1009,18 +994,17 @@ void SwapUVPlane(const uint8_t* src_uv, int y; void (*SwapUVRow)(const uint8_t* src_uv, uint8_t* dst_vu, int width) = SwapUVRow_C; - if (width <= 0 || height == 0 || height == INT_MIN) { + if (width <= 0 || height == 0) { return; } // Negative height means invert the image. if (height < 0) { height = -height; - src_uv = src_uv + (ptrdiff_t)(height - 1) * src_stride_uv; + src_uv = src_uv + (height - 1) * src_stride_uv; src_stride_uv = -src_stride_uv; } // Coalesce rows. - if (src_stride_uv == width * 2 && dst_stride_vu == width * 2 && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_uv == width * 2 && dst_stride_vu == width * 2) { width *= height; height = 1; src_stride_uv = dst_stride_vu = 0; @@ -1073,7 +1057,7 @@ int NV21ToNV12(const uint8_t* src_y, int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; - if (!src_vu || !dst_uv || width <= 0 || height == 0 || height == INT_MIN) { + if (!src_vu || !dst_uv || width <= 0 || height == 0) { return -1; } @@ -1085,7 +1069,7 @@ int NV21ToNV12(const uint8_t* src_y, if (height < 0) { height = -height; halfheight = (height + 1) >> 1; - src_vu = src_vu + (ptrdiff_t)(halfheight - 1) * src_stride_vu; + src_vu = src_vu + (halfheight - 1) * src_stride_vu; src_stride_vu = -src_stride_vu; } @@ -1095,7 +1079,7 @@ int NV21ToNV12(const uint8_t* src_y, } // Test if tile_height is a power of 2 (16 or 32) -#define IS_POWEROFTWO(x) (!((x) & ((x) - 1))) +#define IS_POWEROFTWO(x) (!((x) & ((x)-1))) // Detile a plane of data // tile width is 16 and assumed. @@ -1114,7 +1098,7 @@ int DetilePlane(const uint8_t* src_y, int y; void (*DetileRow)(const uint8_t* src, ptrdiff_t src_tile_stride, uint8_t* dst, int width) = DetileRow_C; - if (!src_y || !dst_y || width <= 0 || height == 0 || height == INT_MIN || + if (!src_y || !dst_y || width <= 0 || height == 0 || !IS_POWEROFTWO(tile_height)) { return -1; } @@ -1122,7 +1106,7 @@ int DetilePlane(const uint8_t* src_y, // Negative height means invert the image. if (height < 0) { height = -height; - dst_y = dst_y + (ptrdiff_t)(height - 1) * dst_stride_y; + dst_y = dst_y + (height - 1) * dst_stride_y; dst_stride_y = -dst_stride_y; } @@ -1171,7 +1155,7 @@ int DetilePlane_16(const uint16_t* src_y, int y; void (*DetileRow_16)(const uint16_t* src, ptrdiff_t src_tile_stride, uint16_t* dst, int width) = DetileRow_16_C; - if (!src_y || !dst_y || width <= 0 || height == 0 || height == INT_MIN || + if (!src_y || !dst_y || width <= 0 || height == 0 || !IS_POWEROFTWO(tile_height)) { return -1; } @@ -1179,7 +1163,7 @@ int DetilePlane_16(const uint16_t* src_y, // Negative height means invert the image. if (height < 0) { height = -height; - dst_y = dst_y + (ptrdiff_t)(height - 1) * dst_stride_y; + dst_y = dst_y + (height - 1) * dst_stride_y; dst_stride_y = -dst_stride_y; } @@ -1240,15 +1224,15 @@ void DetileSplitUVPlane(const uint8_t* src_uv, assert(tile_height > 0); assert(src_stride_uv > 0); - if (width <= 0 || height == 0 || height == INT_MIN) { + if (width <= 0 || height == 0) { return; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_u = dst_u + (ptrdiff_t)(height - 1) * dst_stride_u; + dst_u = dst_u + (height - 1) * dst_stride_u; dst_stride_u = -dst_stride_u; - dst_v = dst_v + (ptrdiff_t)(height - 1) * dst_stride_v; + dst_v = dst_v + (height - 1) * dst_stride_v; dst_stride_v = -dst_stride_v; } @@ -1304,13 +1288,13 @@ void DetileToYUY2(const uint8_t* src_y, assert(src_stride_uv > 0); assert(tile_height > 0); - if (width <= 0 || height == 0 || height == INT_MIN || tile_height <= 0) { + if (width <= 0 || height == 0 || tile_height <= 0) { return; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_yuy2 = dst_yuy2 + (ptrdiff_t)(height - 1) * dst_stride_yuy2; + dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2; dst_stride_yuy2 = -dst_stride_yuy2; } @@ -1366,23 +1350,22 @@ void SplitRGBPlane(const uint8_t* src_rgb, int y; void (*SplitRGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g, uint8_t* dst_b, int width) = SplitRGBRow_C; - if (width <= 0 || height == 0 || height == INT_MIN) { + if (width <= 0 || height == 0) { return; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_r = dst_r + (ptrdiff_t)(height - 1) * dst_stride_r; - dst_g = dst_g + (ptrdiff_t)(height - 1) * dst_stride_g; - dst_b = dst_b + (ptrdiff_t)(height - 1) * dst_stride_b; + dst_r = dst_r + (height - 1) * dst_stride_r; + dst_g = dst_g + (height - 1) * dst_stride_g; + dst_b = dst_b + (height - 1) * dst_stride_b; dst_stride_r = -dst_stride_r; dst_stride_g = -dst_stride_g; dst_stride_b = -dst_stride_b; } // Coalesce rows. if (src_stride_rgb == width * 3 && dst_stride_r == width && - dst_stride_g == width && dst_stride_b == width && - (ptrdiff_t)width * height <= INT_MAX) { + dst_stride_g == width && dst_stride_b == width) { width *= height; height = 1; src_stride_rgb = dst_stride_r = dst_stride_g = dst_stride_b = 0; @@ -1450,19 +1433,19 @@ void MergeRGBPlane(const uint8_t* src_r, void (*MergeRGBRow)(const uint8_t* src_r, const uint8_t* src_g, const uint8_t* src_b, uint8_t* dst_rgb, int width) = MergeRGBRow_C; - if (width <= 0 || height == 0 || height == INT_MIN) { + if (width <= 0 || height == 0) { return; } // Coalesce rows. // Negative height means invert the image. if (height < 0) { height = -height; - dst_rgb = dst_rgb + (ptrdiff_t)(height - 1) * dst_stride_rgb; + dst_rgb = dst_rgb + (height - 1) * dst_stride_rgb; dst_stride_rgb = -dst_stride_rgb; } // Coalesce rows. if (src_stride_r == width && src_stride_g == width && src_stride_b == width && - dst_stride_rgb == width * 3 && (ptrdiff_t)width * height <= INT_MAX) { + dst_stride_rgb == width * 3) { width *= height; height = 1; src_stride_r = src_stride_g = src_stride_b = dst_stride_rgb = 0; @@ -1517,14 +1500,13 @@ static void SplitARGBPlaneAlpha(const uint8_t* src_argb, uint8_t* dst_b, uint8_t* dst_a, int width) = SplitARGBRow_C; - assert(height >= 0); + assert(height > 0); if (width <= 0 || height == 0) { return; } if (src_stride_argb == width * 4 && dst_stride_r == width && - dst_stride_g == width && dst_stride_b == width && dst_stride_a == width && - (ptrdiff_t)width * height <= INT_MAX) { + dst_stride_g == width && dst_stride_b == width && dst_stride_a == width) { width *= height; height = 1; src_stride_argb = dst_stride_r = dst_stride_g = dst_stride_b = @@ -1593,14 +1575,13 @@ static void SplitARGBPlaneOpaque(const uint8_t* src_argb, int y; void (*SplitXRGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g, uint8_t* dst_b, int width) = SplitXRGBRow_C; - assert(height >= 0); + assert(height > 0); if (width <= 0 || height == 0) { return; } if (src_stride_argb == width * 4 && dst_stride_r == width && - dst_stride_g == width && dst_stride_b == width && - (ptrdiff_t)width * height <= INT_MAX) { + dst_stride_g == width && dst_stride_b == width) { width *= height; height = 1; src_stride_argb = dst_stride_r = dst_stride_g = dst_stride_b = 0; @@ -1666,16 +1647,13 @@ void SplitARGBPlane(const uint8_t* src_argb, int dst_stride_a, int width, int height) { - if (width <= 0 || height == 0 || height == INT_MIN) { - return; - } // Negative height means invert the image. if (height < 0) { height = -height; - dst_r = dst_r + (ptrdiff_t)(height - 1) * dst_stride_r; - dst_g = dst_g + (ptrdiff_t)(height - 1) * dst_stride_g; - dst_b = dst_b + (ptrdiff_t)(height - 1) * dst_stride_b; - dst_a = dst_a + (ptrdiff_t)(height - 1) * dst_stride_a; + dst_r = dst_r + (height - 1) * dst_stride_r; + dst_g = dst_g + (height - 1) * dst_stride_g; + dst_b = dst_b + (height - 1) * dst_stride_b; + dst_a = dst_a + (height - 1) * dst_stride_a; dst_stride_r = -dst_stride_r; dst_stride_g = -dst_stride_g; dst_stride_b = -dst_stride_b; @@ -1710,14 +1688,13 @@ static void MergeARGBPlaneAlpha(const uint8_t* src_r, const uint8_t* src_b, const uint8_t* src_a, uint8_t* dst_argb, int width) = MergeARGBRow_C; - assert(height >= 0); + assert(height > 0); if (width <= 0 || height == 0) { return; } if (src_stride_r == width && src_stride_g == width && src_stride_b == width && - src_stride_a == width && dst_stride_argb == width * 4 && - (ptrdiff_t)width * height <= INT_MAX) { + src_stride_a == width && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_r = src_stride_g = src_stride_b = src_stride_a = @@ -1779,13 +1756,13 @@ static void MergeARGBPlaneOpaque(const uint8_t* src_r, const uint8_t* src_b, uint8_t* dst_argb, int width) = MergeXRGBRow_C; - assert(height >= 0); + assert(height > 0); if (width <= 0 || height == 0) { return; } if (src_stride_r == width && src_stride_g == width && src_stride_b == width && - dst_stride_argb == width * 4 && (ptrdiff_t)width * height <= INT_MAX) { + dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_r = src_stride_g = src_stride_b = dst_stride_argb = 0; @@ -1842,13 +1819,10 @@ void MergeARGBPlane(const uint8_t* src_r, int dst_stride_argb, int width, int height) { - if (width <= 0 || height == 0 || height == INT_MIN) { - return; - } // Negative height means invert the image. if (height < 0) { height = -height; - dst_argb = dst_argb + (ptrdiff_t)(height - 1) * dst_stride_argb; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } @@ -1881,18 +1855,15 @@ void MergeXR30Plane(const uint16_t* src_r, const uint16_t* src_b, uint8_t* dst_ar30, int depth, int width) = MergeXR30Row_C; - if (width <= 0 || height == 0 || height == INT_MIN) { - return; - } // Negative height means invert the image. if (height < 0) { height = -height; - dst_ar30 = dst_ar30 + (ptrdiff_t)(height - 1) * dst_stride_ar30; + dst_ar30 = dst_ar30 + (height - 1) * dst_stride_ar30; dst_stride_ar30 = -dst_stride_ar30; } // Coalesce rows. if (src_stride_r == width && src_stride_g == width && src_stride_b == width && - dst_stride_ar30 == width * 4 && (ptrdiff_t)width * height <= INT_MAX) { + dst_stride_ar30 == width * 4) { width *= height; height = 1; src_stride_r = src_stride_g = src_stride_b = dst_stride_ar30 = 0; @@ -1950,14 +1921,8 @@ static void MergeAR64PlaneAlpha(const uint16_t* src_r, uint16_t* dst_argb, int depth, int width) = MergeAR64Row_C; - assert(height >= 0); - - if (width <= 0 || height == 0) { - return; - } if (src_stride_r == width && src_stride_g == width && src_stride_b == width && - src_stride_a == width && dst_stride_ar64 == width * 4 && - (ptrdiff_t)width * height <= INT_MAX) { + src_stride_a == width && dst_stride_ar64 == width * 4) { width *= height; height = 1; src_stride_r = src_stride_g = src_stride_b = src_stride_a = @@ -2007,14 +1972,9 @@ static void MergeAR64PlaneOpaque(const uint16_t* src_r, const uint16_t* src_b, uint16_t* dst_argb, int depth, int width) = MergeXR64Row_C; - assert(height >= 0); - - if (width <= 0 || height == 0) { - return; - } // Coalesce rows. if (src_stride_r == width && src_stride_g == width && src_stride_b == width && - dst_stride_ar64 == width * 4 && (ptrdiff_t)width * height <= INT_MAX) { + dst_stride_ar64 == width * 4) { width *= height; height = 1; src_stride_r = src_stride_g = src_stride_b = dst_stride_ar64 = 0; @@ -2059,13 +2019,10 @@ void MergeAR64Plane(const uint16_t* src_r, int width, int height, int depth) { - if (width <= 0 || height == 0 || height == INT_MIN) { - return; - } // Negative height means invert the image. if (height < 0) { height = -height; - dst_ar64 = dst_ar64 + (ptrdiff_t)(height - 1) * dst_stride_ar64; + dst_ar64 = dst_ar64 + (height - 1) * dst_stride_ar64; dst_stride_ar64 = -dst_stride_ar64; } @@ -2100,14 +2057,8 @@ static void MergeARGB16To8PlaneAlpha(const uint16_t* src_r, uint8_t* dst_argb, int depth, int width) = MergeARGB16To8Row_C; - assert(height >= 0); - - if (width <= 0 || height == 0) { - return; - } if (src_stride_r == width && src_stride_g == width && src_stride_b == width && - src_stride_a == width && dst_stride_argb == width * 4 && - (ptrdiff_t)width * height <= INT_MAX) { + src_stride_a == width && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_r = src_stride_g = src_stride_b = src_stride_a = @@ -2157,14 +2108,9 @@ static void MergeARGB16To8PlaneOpaque(const uint16_t* src_r, const uint16_t* src_b, uint8_t* dst_argb, int depth, int width) = MergeXRGB16To8Row_C; - assert(height >= 0); - - if (width <= 0 || height == 0) { - return; - } // Coalesce rows. if (src_stride_r == width && src_stride_g == width && src_stride_b == width && - dst_stride_argb == width * 4 && (ptrdiff_t)width * height <= INT_MAX) { + dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_r = src_stride_g = src_stride_b = dst_stride_argb = 0; @@ -2209,13 +2155,10 @@ void MergeARGB16To8Plane(const uint16_t* src_r, int width, int height, int depth) { - if (width <= 0 || height == 0 || height == INT_MIN) { - return; - } // Negative height means invert the image. if (height < 0) { height = -height; - dst_argb = dst_argb + (ptrdiff_t)(height - 1) * dst_stride_argb; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } @@ -2247,20 +2190,19 @@ int YUY2ToI422(const uint8_t* src_yuy2, uint8_t* dst_v, int width) = YUY2ToUV422Row_C; void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) = YUY2ToYRow_C; - if (!src_yuy2 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_yuy2 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_yuy2 = src_yuy2 + (ptrdiff_t)(height - 1) * src_stride_yuy2; + src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2; src_stride_yuy2 = -src_stride_yuy2; } // Coalesce rows. if (src_stride_yuy2 == width * 2 && dst_stride_y == width && dst_stride_u * 2 == width && dst_stride_v * 2 == width && - (ptrdiff_t)width * height <= 32768) { + width * height <= 32768) { width *= height; height = 1; src_stride_yuy2 = dst_stride_y = dst_stride_u = dst_stride_v = 0; @@ -2344,20 +2286,19 @@ int UYVYToI422(const uint8_t* src_uyvy, uint8_t* dst_v, int width) = UYVYToUV422Row_C; void (*UYVYToYRow)(const uint8_t* src_uyvy, uint8_t* dst_y, int width) = UYVYToYRow_C; - if (!src_uyvy || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_uyvy || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_uyvy = src_uyvy + (ptrdiff_t)(height - 1) * src_stride_uyvy; + src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy; src_stride_uyvy = -src_stride_uyvy; } // Coalesce rows. if (src_stride_uyvy == width * 2 && dst_stride_y == width && dst_stride_u * 2 == width && dst_stride_v * 2 == width && - (ptrdiff_t)width * height <= 32768) { + width * height <= 32768) { width *= height; height = 1; src_stride_uyvy = dst_stride_y = dst_stride_u = dst_stride_v = 0; @@ -2435,18 +2376,17 @@ int YUY2ToY(const uint8_t* src_yuy2, int y; void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) = YUY2ToYRow_C; - if (!src_yuy2 || !dst_y || width <= 0 || height == 0 || height == INT_MIN) { + if (!src_yuy2 || !dst_y || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_yuy2 = src_yuy2 + (ptrdiff_t)(height - 1) * src_stride_yuy2; + src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2; src_stride_yuy2 = -src_stride_yuy2; } // Coalesce rows. - if (src_stride_yuy2 == width * 2 && dst_stride_y == width && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_yuy2 == width * 2 && dst_stride_y == width) { width *= height; height = 1; src_stride_yuy2 = dst_stride_y = 0; @@ -2495,18 +2435,17 @@ int UYVYToY(const uint8_t* src_uyvy, int y; void (*UYVYToYRow)(const uint8_t* src_uyvy, uint8_t* dst_y, int width) = UYVYToYRow_C; - if (!src_uyvy || !dst_y || width <= 0 || height == 0 || height == INT_MIN) { + if (!src_uyvy || !dst_y || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_uyvy = src_uyvy + (ptrdiff_t)(height - 1) * src_stride_uyvy; + src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy; src_stride_uyvy = -src_stride_uyvy; } // Coalesce rows. - if (src_stride_uyvy == width * 2 && dst_stride_y == width && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_uyvy == width * 2 && dst_stride_y == width) { width *= height; height = 1; src_stride_uyvy = dst_stride_y = 0; @@ -2563,13 +2502,10 @@ void MirrorPlane(const uint8_t* src_y, int height) { int y; void (*MirrorRow)(const uint8_t* src, uint8_t* dst, int width) = MirrorRow_C; - if (width <= 0 || height == 0 || height == INT_MIN) { - return; - } // Negative height means invert the image. if (height < 0) { height = -height; - src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y; + src_y = src_y + (height - 1) * src_stride_y; src_stride_y = -src_stride_y; } #if defined(HAS_MIRRORROW_NEON) @@ -2596,14 +2532,6 @@ void MirrorPlane(const uint8_t* src_y, } } #endif -#if defined(HAS_MIRRORROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - MirrorRow = MirrorRow_Any_AVX512BW; - if (IS_ALIGNED(width, 64)) { - MirrorRow = MirrorRow_AVX512BW; - } - } -#endif #if defined(HAS_MIRRORROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { MirrorRow = MirrorRow_Any_LSX; @@ -2640,13 +2568,10 @@ void MirrorUVPlane(const uint8_t* src_uv, int y; void (*MirrorUVRow)(const uint8_t* src, uint8_t* dst, int width) = MirrorUVRow_C; - if (width <= 0 || height == 0 || height == INT_MIN) { - return; - } // Negative height means invert the image. if (height < 0) { height = -height; - src_uv = src_uv + (ptrdiff_t)(height - 1) * src_stride_uv; + src_uv = src_uv + (height - 1) * src_stride_uv; src_stride_uv = -src_stride_uv; } #if defined(HAS_MIRRORUVROW_NEON) @@ -2659,6 +2584,7 @@ void MirrorUVPlane(const uint8_t* src_uv, #endif #if defined(HAS_MIRRORUVROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { + MirrorUVRow = MirrorUVRow_Any_SSSE3; if (IS_ALIGNED(width, 8)) { MirrorUVRow = MirrorUVRow_SSSE3; } @@ -2705,13 +2631,13 @@ int I400Mirror(const uint8_t* src_y, int dst_stride_y, int width, int height) { - if (!src_y || !dst_y || width <= 0 || height == 0 || height == INT_MIN) { + if (!src_y || !dst_y || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y; + src_y = src_y + (height - 1) * src_stride_y; src_stride_y = -src_stride_y; } @@ -2739,7 +2665,7 @@ int I420Mirror(const uint8_t* src_y, int halfheight = (height + 1) >> 1; if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 || - height == 0 || height == INT_MIN) { + height == 0) { return -1; } @@ -2747,9 +2673,9 @@ int I420Mirror(const uint8_t* src_y, if (height < 0) { height = -height; halfheight = (height + 1) >> 1; - src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y; - src_u = src_u + (ptrdiff_t)(halfheight - 1) * src_stride_u; - src_v = src_v + (ptrdiff_t)(halfheight - 1) * src_stride_v; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (halfheight - 1) * src_stride_u; + src_v = src_v + (halfheight - 1) * src_stride_v; src_stride_y = -src_stride_y; src_stride_u = -src_stride_u; src_stride_v = -src_stride_v; @@ -2778,8 +2704,7 @@ int NV12Mirror(const uint8_t* src_y, int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; - if ((!src_y && dst_y) || !src_uv || !dst_uv || width <= 0 || height == 0 || - height == INT_MIN) { + if ((!src_y && dst_y) || !src_uv || !dst_uv || width <= 0 || height == 0) { return -1; } @@ -2787,8 +2712,8 @@ int NV12Mirror(const uint8_t* src_y, if (height < 0) { height = -height; halfheight = (height + 1) >> 1; - src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y; - src_uv = src_uv + (ptrdiff_t)(halfheight - 1) * src_stride_uv; + src_y = src_y + (height - 1) * src_stride_y; + src_uv = src_uv + (halfheight - 1) * src_stride_uv; src_stride_y = -src_stride_y; src_stride_uv = -src_stride_uv; } @@ -2812,14 +2737,13 @@ int ARGBMirror(const uint8_t* src_argb, int y; void (*ARGBMirrorRow)(const uint8_t* src, uint8_t* dst, int width) = ARGBMirrorRow_C; - if (!src_argb || !dst_argb || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_argb || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_argb = src_argb + (ptrdiff_t)(height - 1) * src_stride_argb; + src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } #if defined(HAS_ARGBMIRRORROW_NEON) @@ -2883,14 +2807,13 @@ int RGB24Mirror(const uint8_t* src_rgb24, int y; void (*RGB24MirrorRow)(const uint8_t* src, uint8_t* dst, int width) = RGB24MirrorRow_C; - if (!src_rgb24 || !dst_rgb24 || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_rgb24 || !dst_rgb24 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_rgb24 = src_rgb24 + (ptrdiff_t)(height - 1) * src_stride_rgb24; + src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24; src_stride_rgb24 = -src_stride_rgb24; } #if defined(HAS_RGB24MIRRORROW_NEON) @@ -2901,11 +2824,11 @@ int RGB24Mirror(const uint8_t* src_rgb24, } } #endif -#if defined(HAS_RGB24MIRRORROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - RGB24MirrorRow = RGB24MirrorRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - RGB24MirrorRow = RGB24MirrorRow_AVX2; +#if defined(HAS_RGB24MIRRORROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + RGB24MirrorRow = RGB24MirrorRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + RGB24MirrorRow = RGB24MirrorRow_SSSE3; } } #endif @@ -2932,19 +2855,18 @@ int ARGBBlend(const uint8_t* src_argb0, int y; void (*ARGBBlendRow)(const uint8_t* src_argb, const uint8_t* src_argb1, uint8_t* dst_argb, int width) = ARGBBlendRow_C; - if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_argb = dst_argb + (ptrdiff_t)(height - 1) * dst_stride_argb; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } // Coalesce rows. if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 && - dst_stride_argb == width * 4 && (ptrdiff_t)width * height <= INT_MAX) { + dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0; @@ -2994,21 +2916,19 @@ int BlendPlane(const uint8_t* src_y0, void (*BlendPlaneRow)(const uint8_t* src0, const uint8_t* src1, const uint8_t* alpha, uint8_t* dst, int width) = BlendPlaneRow_C; - if (!src_y0 || !src_y1 || !alpha || !dst_y || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_y0 || !src_y1 || !alpha || !dst_y || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_y = dst_y + (ptrdiff_t)(height - 1) * dst_stride_y; + dst_y = dst_y + (height - 1) * dst_stride_y; dst_stride_y = -dst_stride_y; } // Coalesce rows for Y plane. if (src_stride_y0 == width && src_stride_y1 == width && - alpha_stride == width && dst_stride_y == width && - (ptrdiff_t)width * height <= INT_MAX) { + alpha_stride == width && dst_stride_y == width) { width *= height; height = 1; src_stride_y0 = src_stride_y1 = alpha_stride = dst_stride_y = 0; @@ -3081,15 +3001,14 @@ int I420Blend(const uint8_t* src_y0, uint8_t* dst_ptr, int dst_width) = ScaleRowDown2Box_C; if (!src_y0 || !src_u0 || !src_v0 || !src_y1 || !src_u1 || !src_v1 || - !alpha || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0 || - height == INT_MIN) { + !alpha || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_y = dst_y + (ptrdiff_t)(height - 1) * dst_stride_y; + dst_y = dst_y + (height - 1) * dst_stride_y; dst_stride_y = -dst_stride_y; } @@ -3171,7 +3090,7 @@ int I420Blend(const uint8_t* src_y0, } // Subsample 2 rows of UV to half width and half height. ScaleRowDown2(alpha, alpha_stride, halfalpha, halfwidth); - alpha += (ptrdiff_t)alpha_stride * 2; + alpha += alpha_stride * 2; BlendPlaneRow(src_u0, src_u1, halfalpha, dst_u, halfwidth); BlendPlaneRow(src_v0, src_v1, halfalpha, dst_v, halfwidth); src_u0 += src_stride_u0; @@ -3198,19 +3117,18 @@ int ARGBMultiply(const uint8_t* src_argb0, int y; void (*ARGBMultiplyRow)(const uint8_t* src0, const uint8_t* src1, uint8_t* dst, int width) = ARGBMultiplyRow_C; - if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_argb = dst_argb + (ptrdiff_t)(height - 1) * dst_stride_argb; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } // Coalesce rows. if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 && - dst_stride_argb == width * 4 && (ptrdiff_t)width * height <= INT_MAX) { + dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0; @@ -3284,19 +3202,18 @@ int ARGBAdd(const uint8_t* src_argb0, int y; void (*ARGBAddRow)(const uint8_t* src0, const uint8_t* src1, uint8_t* dst, int width) = ARGBAddRow_C; - if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_argb = dst_argb + (ptrdiff_t)(height - 1) * dst_stride_argb; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } // Coalesce rows. if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 && - dst_stride_argb == width * 4 && (ptrdiff_t)width * height <= INT_MAX) { + dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0; @@ -3370,19 +3287,18 @@ int ARGBSubtract(const uint8_t* src_argb0, int y; void (*ARGBSubtractRow)(const uint8_t* src0, const uint8_t* src1, uint8_t* dst, int width) = ARGBSubtractRow_C; - if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst_argb = dst_argb + (ptrdiff_t)(height - 1) * dst_stride_argb; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } // Coalesce rows. if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 && - dst_stride_argb == width * 4 && (ptrdiff_t)width * height <= INT_MAX) { + dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0; @@ -3449,19 +3365,17 @@ int RAWToRGB24(const uint8_t* src_raw, int y; void (*RAWToRGB24Row)(const uint8_t* src_rgb, uint8_t* dst_rgb24, int width) = RAWToRGB24Row_C; - if (!src_raw || !dst_rgb24 || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_raw || !dst_rgb24 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_raw = src_raw + (ptrdiff_t)(height - 1) * src_stride_raw; + src_raw = src_raw + (height - 1) * src_stride_raw; src_stride_raw = -src_stride_raw; } // Coalesce rows. - if (src_stride_raw == width * 3 && dst_stride_rgb24 == width * 3 && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_raw == width * 3 && dst_stride_rgb24 == width * 3) { width *= height; height = 1; src_stride_raw = dst_stride_rgb24 = 0; @@ -3519,16 +3433,16 @@ void SetPlane(uint8_t* dst_y, int y; void (*SetRow)(uint8_t* dst, uint8_t value, int width) = SetRow_C; - if (width <= 0 || height == 0 || height == INT_MIN) { + if (width <= 0 || height == 0) { return; } if (height < 0) { height = -height; - dst_y = dst_y + (ptrdiff_t)(height - 1) * dst_stride_y; + dst_y = dst_y + (height - 1) * dst_stride_y; dst_stride_y = -dst_stride_y; } // Coalesce rows. - if (dst_stride_y == width && (ptrdiff_t)width * height <= INT_MAX) { + if (dst_stride_y == width) { width *= height; height = 1; dst_stride_y = 0; @@ -3591,9 +3505,9 @@ int I420Rect(uint8_t* dst_y, uint8_t* start_u = dst_u + (y / 2) * dst_stride_u + (x / 2); uint8_t* start_v = dst_v + (y / 2) * dst_stride_v + (x / 2); - if (!dst_y || !dst_u || !dst_v || width <= 0 || height == 0 || - height == INT_MIN || x < 0 || y < 0 || value_y < 0 || value_y > 255 || - value_u < 0 || value_u > 255 || value_v < 0 || value_v > 255) { + if (!dst_y || !dst_u || !dst_v || width <= 0 || height == 0 || x < 0 || + y < 0 || value_y < 0 || value_y > 255 || value_u < 0 || value_u > 255 || + value_v < 0 || value_v > 255) { return -1; } @@ -3615,18 +3529,17 @@ int ARGBRect(uint8_t* dst_argb, int y; void (*ARGBSetRow)(uint8_t* dst_argb, uint32_t value, int width) = ARGBSetRow_C; - if (!dst_argb || width <= 0 || height == 0 || height == INT_MIN || - dst_x < 0 || dst_y < 0) { + if (!dst_argb || width <= 0 || height == 0 || dst_x < 0 || dst_y < 0) { return -1; } if (height < 0) { height = -height; - dst_argb = dst_argb + (ptrdiff_t)(height - 1) * dst_stride_argb; + dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } dst_argb += dst_y * dst_stride_argb + dst_x * 4; // Coalesce rows. - if (dst_stride_argb == width * 4 && (ptrdiff_t)width * height <= INT_MAX) { + if (dst_stride_argb == width * 4) { width *= height; height = 1; dst_stride_argb = 0; @@ -3685,18 +3598,16 @@ int ARGBAttenuate(const uint8_t* src_argb, int y; void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBAttenuateRow_C; - if (!src_argb || !dst_argb || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_argb || !dst_argb || width <= 0 || height == 0) { return -1; } if (height < 0) { height = -height; - src_argb = src_argb + (ptrdiff_t)(height - 1) * src_stride_argb; + src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride_argb == width * 4 && dst_stride_argb == width * 4 && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_argb = dst_stride_argb = 0; @@ -3766,18 +3677,16 @@ int ARGBUnattenuate(const uint8_t* src_argb, int y; void (*ARGBUnattenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBUnattenuateRow_C; - if (!src_argb || !dst_argb || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_argb || !dst_argb || width <= 0 || height == 0) { return -1; } if (height < 0) { height = -height; - src_argb = src_argb + (ptrdiff_t)(height - 1) * src_stride_argb; + src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride_argb == width * 4 && dst_stride_argb == width * 4 && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_argb = dst_stride_argb = 0; @@ -3819,18 +3728,16 @@ int ARGBGrayTo(const uint8_t* src_argb, int y; void (*ARGBGrayRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBGrayRow_C; - if (!src_argb || !dst_argb || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_argb || !dst_argb || width <= 0 || height == 0) { return -1; } if (height < 0) { height = -height; - src_argb = src_argb + (ptrdiff_t)(height - 1) * src_stride_argb; + src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride_argb == width * 4 && dst_stride_argb == width * 4 && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_argb = dst_stride_argb = 0; @@ -3885,7 +3792,7 @@ int ARGBGray(uint8_t* dst_argb, return -1; } // Coalesce rows. - if (dst_stride_argb == width * 4 && (ptrdiff_t)width * height <= INT_MAX) { + if (dst_stride_argb == width * 4) { width *= height; height = 1; dst_stride_argb = 0; @@ -3938,7 +3845,7 @@ int ARGBSepia(uint8_t* dst_argb, return -1; } // Coalesce rows. - if (dst_stride_argb == width * 4 && (ptrdiff_t)width * height <= INT_MAX) { + if (dst_stride_argb == width * 4) { width *= height; height = 1; dst_stride_argb = 0; @@ -3990,18 +3897,16 @@ int ARGBColorMatrix(const uint8_t* src_argb, void (*ARGBColorMatrixRow)(const uint8_t* src_argb, uint8_t* dst_argb, const int8_t* matrix_argb, int width) = ARGBColorMatrixRow_C; - if (!src_argb || !dst_argb || !matrix_argb || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_argb || !dst_argb || !matrix_argb || width <= 0 || height == 0) { return -1; } if (height < 0) { height = -height; - src_argb = src_argb + (ptrdiff_t)(height - 1) * src_stride_argb; + src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride_argb == width * 4 && dst_stride_argb == width * 4 && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_argb = dst_stride_argb = 0; @@ -4090,7 +3995,7 @@ int ARGBColorTable(uint8_t* dst_argb, return -1; } // Coalesce rows. - if (dst_stride_argb == width * 4 && (ptrdiff_t)width * height <= INT_MAX) { + if (dst_stride_argb == width * 4) { width *= height; height = 1; dst_stride_argb = 0; @@ -4126,7 +4031,7 @@ int RGBColorTable(uint8_t* dst_argb, return -1; } // Coalesce rows. - if (dst_stride_argb == width * 4 && (ptrdiff_t)width * height <= INT_MAX) { + if (dst_stride_argb == width * 4) { width *= height; height = 1; dst_stride_argb = 0; @@ -4171,7 +4076,7 @@ int ARGBQuantize(uint8_t* dst_argb, return -1; } // Coalesce rows. - if (dst_stride_argb == width * 4 && (ptrdiff_t)width * height <= INT_MAX) { + if (dst_stride_argb == width * 4) { width *= height; height = 1; dst_stride_argb = 0; @@ -4256,13 +4161,12 @@ int ARGBBlur(const uint8_t* src_argb, int32_t* max_cumsum_bot_row; int32_t* cumsum_top_row; - if (!src_argb || !dst_argb || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_argb || !dst_argb || width <= 0 || height == 0) { return -1; } if (height < 0) { height = -height; - src_argb = src_argb + (ptrdiff_t)(height - 1) * src_stride_argb; + src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } if (radius > height) { @@ -4357,18 +4261,16 @@ int ARGBShade(const uint8_t* src_argb, int y; void (*ARGBShadeRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width, uint32_t value) = ARGBShadeRow_C; - if (!src_argb || !dst_argb || width <= 0 || height == 0 || - height == INT_MIN || value == 0u) { + if (!src_argb || !dst_argb || width <= 0 || height == 0 || value == 0u) { return -1; } if (height < 0) { height = -height; - src_argb = src_argb + (ptrdiff_t)(height - 1) * src_stride_argb; + src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride_argb == width * 4 && dst_stride_argb == width * 4 && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_argb = dst_stride_argb = 0; @@ -4417,23 +4319,29 @@ int InterpolatePlane(const uint8_t* src0, void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; - if (!src0 || !src1 || !dst || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src0 || !src1 || !dst || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst = dst + (ptrdiff_t)(height - 1) * dst_stride; + dst = dst + (height - 1) * dst_stride; dst_stride = -dst_stride; } // Coalesce rows. - if (src_stride0 == width && src_stride1 == width && dst_stride == width && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride0 == width && src_stride1 == width && dst_stride == width) { width *= height; height = 1; src_stride0 = src_stride1 = dst_stride = 0; } +#if defined(HAS_INTERPOLATEROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + InterpolateRow = InterpolateRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + InterpolateRow = InterpolateRow_SSSE3; + } + } +#endif #if defined(HAS_INTERPOLATEROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { InterpolateRow = InterpolateRow_Any_AVX2; @@ -4493,19 +4401,17 @@ int InterpolatePlane_16(const uint16_t* src0, void (*InterpolateRow_16)(uint16_t* dst_ptr, const uint16_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_16_C; - if (!src0 || !src1 || !dst || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src0 || !src1 || !dst || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - dst = dst + (ptrdiff_t)(height - 1) * dst_stride; + dst = dst + (height - 1) * dst_stride; dst_stride = -dst_stride; } // Coalesce rows. - if (src_stride0 == width && src_stride1 == width && dst_stride == width && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride0 == width && src_stride1 == width && dst_stride == width) { width *= height; height = 1; src_stride0 = src_stride1 = dst_stride = 0; @@ -4600,8 +4506,7 @@ int I420Interpolate(const uint8_t* src0_y, int halfheight = (height + 1) >> 1; if (!src0_y || !src0_u || !src0_v || !src1_y || !src1_u || !src1_v || - !dst_y || !dst_u || !dst_v || width <= 0 || height == 0 || - height == INT_MIN) { + !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } @@ -4626,19 +4531,17 @@ int ARGBShuffle(const uint8_t* src_argb, int y; void (*ARGBShuffleRow)(const uint8_t* src_argb, uint8_t* dst_argb, const uint8_t* shuffler, int width) = ARGBShuffleRow_C; - if (!src_argb || !dst_argb || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_argb || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_argb = src_argb + (ptrdiff_t)(height - 1) * src_stride_argb; + src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride_argb == width * 4 && dst_stride_argb == width * 4 && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_argb = dst_stride_argb = 0; @@ -4659,14 +4562,6 @@ int ARGBShuffle(const uint8_t* src_argb, } } #endif -#if defined(HAS_ARGBSHUFFLEROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - ARGBShuffleRow = ARGBShuffleRow_Any_AVX512BW; - if (IS_ALIGNED(width, 32)) { - ARGBShuffleRow = ARGBShuffleRow_AVX512BW; - } - } -#endif #if defined(HAS_ARGBSHUFFLEROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBShuffleRow = ARGBShuffleRow_Any_NEON; @@ -4712,19 +4607,17 @@ int AR64Shuffle(const uint16_t* src_ar64, int y; void (*AR64ShuffleRow)(const uint8_t* src_ar64, uint8_t* dst_ar64, const uint8_t* shuffler, int width) = AR64ShuffleRow_C; - if (!src_ar64 || !dst_ar64 || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_ar64 || !dst_ar64 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_ar64 = src_ar64 + (ptrdiff_t)(height - 1) * src_stride_ar64; + src_ar64 = src_ar64 + (height - 1) * src_stride_ar64; src_stride_ar64 = -src_stride_ar64; } // Coalesce rows. - if (src_stride_ar64 == width * 4 && dst_stride_ar64 == width * 4 && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_ar64 == width * 4 && dst_stride_ar64 == width * 4) { width *= height; height = 1; src_stride_ar64 = dst_stride_ar64 = 0; @@ -4746,14 +4639,6 @@ int AR64Shuffle(const uint16_t* src_ar64, } } #endif -#if defined(HAS_ARGBSHUFFLEROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW)) { - AR64ShuffleRow = ARGBShuffleRow_Any_AVX512BW; - if (IS_ALIGNED(width, 16)) { - AR64ShuffleRow = ARGBShuffleRow_AVX512BW; - } - } -#endif #if defined(HAS_ARGBSHUFFLEROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { AR64ShuffleRow = ARGBShuffleRow_Any_NEON; @@ -4791,13 +4676,13 @@ int GaussPlane_F32(const float* src, int width) = GaussCol_F32_C; void (*GaussRow_F32)(const float* src, float* dst, int width) = GaussRow_F32_C; - if (!src || !dst || width <= 0 || height == 0 || height == INT_MIN) { + if (!src || !dst || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src = src + (ptrdiff_t)(height - 1) * src_stride; + src = src + (height - 1) * src_stride; src_stride = -src_stride; } @@ -4860,76 +4745,83 @@ static int ARGBSobelize(const uint8_t* src_argb, uint8_t* dst, int width)) { int y; - void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_g, int width) = - ARGBToYJRow_C; + void (*ARGBToYMatrixRow)(const uint8_t* src_argb, uint8_t* dst_y, int width, + const struct ArgbConstants* c) = ARGBToYMatrixRow_C; void (*SobelYRow)(const uint8_t* src_y0, const uint8_t* src_y1, uint8_t* dst_sobely, int width) = SobelYRow_C; void (*SobelXRow)(const uint8_t* src_y0, const uint8_t* src_y1, const uint8_t* src_y2, uint8_t* dst_sobely, int width) = SobelXRow_C; const int kEdge = 16; // Extra pixels at start of row for extrude/align. - if (!src_argb || !dst_argb || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_argb || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_argb = src_argb + (ptrdiff_t)(height - 1) * src_stride_argb; + src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } -#if defined(HAS_ARGBTOYJROW_SSSE3) +#if defined(HAS_ARGBTOYMATRIXROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBToYJRow = ARGBToYJRow_Any_SSSE3; + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { - ARGBToYJRow = ARGBToYJRow_SSSE3; + ARGBToYMatrixRow = ARGBToYMatrixRow_SSSE3; } } #endif -#if defined(HAS_ARGBTOYJROW_AVX2) +#if defined(HAS_ARGBTOYMATRIXROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { - ARGBToYJRow = ARGBToYJRow_Any_AVX2; + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { - ARGBToYJRow = ARGBToYJRow_AVX2; + ARGBToYMatrixRow = ARGBToYMatrixRow_AVX2; } } #endif -#if defined(HAS_ARGBTOYROW_AVX512BW) +#if defined(HAS_ARGBTOYMATRIXROW_AVX512BW) if (TestCpuFlag(kCpuHasAVX512BW)) { - ARGBToYJRow = ARGBToYJRow_Any_AVX512BW; + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_AVX512BW; if (IS_ALIGNED(width, 64)) { - ARGBToYJRow = ARGBToYJRow_AVX512BW; + ARGBToYMatrixRow = ARGBToYMatrixRow_AVX512BW; } } #endif -#if defined(HAS_ARGBTOYJROW_NEON) +#if defined(HAS_ARGBTOYMATRIXROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { - ARGBToYJRow = ARGBToYJRow_Any_NEON; + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON; if (IS_ALIGNED(width, 16)) { - ARGBToYJRow = ARGBToYJRow_NEON; + ARGBToYMatrixRow = ARGBToYMatrixRow_NEON; } } #endif -#if defined(HAS_ARGBTOYJROW_LSX) +#if defined(HAS_ARGBTOYMATRIXROW_NEON_DOTPROD) + if (TestCpuFlag(kCpuHasNeonDotProd)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_NEON_DotProd; + if (IS_ALIGNED(width, 16)) { + ARGBToYMatrixRow = ARGBToYMatrixRow_NEON_DotProd; + } + } +#endif +#if defined(HAS_ARGBTOYMATRIXROW_LSX) if (TestCpuFlag(kCpuHasLSX)) { - ARGBToYJRow = ARGBToYJRow_Any_LSX; + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LSX; if (IS_ALIGNED(width, 16)) { - ARGBToYJRow = ARGBToYJRow_LSX; + ARGBToYMatrixRow = ARGBToYMatrixRow_LSX; } } #endif -#if defined(HAS_ARGBTOYJROW_LASX) +#if defined(HAS_ARGBTOYMATRIXROW_LASX) if (TestCpuFlag(kCpuHasLASX)) { - ARGBToYJRow = ARGBToYJRow_Any_LASX; + ARGBToYMatrixRow = ARGBToYMatrixRow_Any_LASX; if (IS_ALIGNED(width, 32)) { - ARGBToYJRow = ARGBToYJRow_LASX; + ARGBToYMatrixRow = ARGBToYMatrixRow_LASX; } } #endif -#if defined(HAS_ARGBTOYJROW_RVV) +#if defined(HAS_ARGBTOYMATRIXROW_RVV) if (TestCpuFlag(kCpuHasRVV)) { - ARGBToYJRow = ARGBToYJRow_RVV; + ARGBToYMatrixRow = ARGBToYMatrixRow_RVV; } #endif @@ -4967,10 +4859,10 @@ static int ARGBSobelize(const uint8_t* src_argb, uint8_t* row_y2 = row_y1 + row_size; if (!rows) return 1; - ARGBToYJRow(src_argb, row_y0, width); + ARGBToYMatrixRow(src_argb, row_y0, width, &kArgbJPEGConstants); row_y0[-1] = row_y0[0]; memset(row_y0 + width, row_y0[width - 1], 16); // Extrude 16 for valgrind. - ARGBToYJRow(src_argb, row_y1, width); + ARGBToYMatrixRow(src_argb, row_y1, width, &kArgbJPEGConstants); row_y1[-1] = row_y1[0]; memset(row_y1 + width, row_y1[width - 1], 16); memset(row_y2 + width, 0, 16); @@ -4980,7 +4872,7 @@ static int ARGBSobelize(const uint8_t* src_argb, if (y < (height - 1)) { src_argb += src_stride_argb; } - ARGBToYJRow(src_argb, row_y2, width); + ARGBToYMatrixRow(src_argb, row_y2, width, &kArgbJPEGConstants); row_y2[-1] = row_y2[0]; row_y2[width] = row_y2[width - 1]; @@ -5130,19 +5022,17 @@ int ARGBPolynomial(const uint8_t* src_argb, int y; void (*ARGBPolynomialRow)(const uint8_t* src_argb, uint8_t* dst_argb, const float* poly, int width) = ARGBPolynomialRow_C; - if (!src_argb || !dst_argb || !poly || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_argb || !dst_argb || !poly || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_argb = src_argb + (ptrdiff_t)(height - 1) * src_stride_argb; + src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride_argb == width * 4 && dst_stride_argb == width * 4 && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_argb = dst_stride_argb = 0; @@ -5180,7 +5070,7 @@ int HalfFloatPlane(const uint16_t* src_y, int y; void (*HalfFloatRow)(const uint16_t* src, uint16_t* dst, float scale, int width) = HalfFloatRow_C; - if (!src_y || !dst_y || width <= 0 || height == 0 || height == INT_MIN) { + if (!src_y || !dst_y || width <= 0 || height == 0) { return -1; } src_stride_y >>= 1; @@ -5188,12 +5078,11 @@ int HalfFloatPlane(const uint16_t* src_y, // Negative height means invert the image. if (height < 0) { height = -height; - src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y; + src_y = src_y + (height - 1) * src_stride_y; src_stride_y = -src_stride_y; } // Coalesce rows. - if (src_stride_y == width && dst_stride_y == width && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_y == width && dst_stride_y == width) { width *= height; height = 1; src_stride_y = dst_stride_y = 0; @@ -5287,19 +5176,17 @@ int ARGBLumaColorTable(const uint8_t* src_argb, void (*ARGBLumaColorTableRow)( const uint8_t* src_argb, uint8_t* dst_argb, int width, const uint8_t* luma, const uint32_t lumacoeff) = ARGBLumaColorTableRow_C; - if (!src_argb || !dst_argb || !luma || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_argb || !dst_argb || !luma || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_argb = src_argb + (ptrdiff_t)(height - 1) * src_stride_argb; + src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride_argb == width * 4 && dst_stride_argb == width * 4 && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_argb = dst_stride_argb = 0; @@ -5329,19 +5216,17 @@ int ARGBCopyAlpha(const uint8_t* src_argb, int y; void (*ARGBCopyAlphaRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBCopyAlphaRow_C; - if (!src_argb || !dst_argb || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_argb || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_argb = src_argb + (ptrdiff_t)(height - 1) * src_stride_argb; + src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride_argb == width * 4 && dst_stride_argb == width * 4 && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_argb = dst_stride_argb = 0; @@ -5379,18 +5264,17 @@ int ARGBExtractAlpha(const uint8_t* src_argb, int dst_stride_a, int width, int height) { - if (!src_argb || !dst_a || width <= 0 || height == 0 || height == INT_MIN) { + if (!src_argb || !dst_a || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_argb += (ptrdiff_t)(height - 1) * src_stride_argb; + src_argb += (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. - if (src_stride_argb == width * 4 && dst_stride_a == width && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_argb == width * 4 && dst_stride_a == width) { width *= height; height = 1; src_stride_argb = dst_stride_a = 0; @@ -5446,18 +5330,17 @@ int ARGBCopyYToAlpha(const uint8_t* src_y, int y; void (*ARGBCopyYToAlphaRow)(const uint8_t* src_y, uint8_t* dst_argb, int width) = ARGBCopyYToAlphaRow_C; - if (!src_y || !dst_argb || width <= 0 || height == 0 || height == INT_MIN) { + if (!src_y || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y; + src_y = src_y + (height - 1) * src_stride_y; src_stride_y = -src_stride_y; } // Coalesce rows. - if (src_stride_y == width && dst_stride_argb == width * 4 && - (ptrdiff_t)width * height <= INT_MAX) { + if (src_stride_y == width && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_y = dst_stride_argb = 0; @@ -5506,15 +5389,14 @@ int YUY2ToNV12(const uint8_t* src_yuy2, YUY2ToYRow_C; void (*YUY2ToNVUVRow)(const uint8_t* src_yuy2, int stride_yuy2, uint8_t* dst_uv, int width) = YUY2ToNVUVRow_C; - if (!src_yuy2 || !dst_y || !dst_uv || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_yuy2 || !dst_y || !dst_uv || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_yuy2 = src_yuy2 + (ptrdiff_t)(height - 1) * src_stride_yuy2; + src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2; src_stride_yuy2 = -src_stride_yuy2; } #if defined(HAS_YUY2TOYROW_SSE2) @@ -5615,15 +5497,14 @@ int UYVYToNV12(const uint8_t* src_uyvy, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; - if (!src_uyvy || !dst_y || !dst_uv || width <= 0 || height == 0 || - height == INT_MIN) { + if (!src_uyvy || !dst_y || !dst_uv || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_uyvy = src_uyvy + (ptrdiff_t)(height - 1) * src_stride_uyvy; + src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy; src_stride_uyvy = -src_stride_uyvy; } #if defined(HAS_SPLITUVROW_SSE2) @@ -5664,6 +5545,14 @@ int UYVYToNV12(const uint8_t* src_uyvy, } #endif +#if defined(HAS_INTERPOLATEROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + InterpolateRow = InterpolateRow_Any_SSSE3; + if (IS_ALIGNED(width, 16)) { + InterpolateRow = InterpolateRow_SSSE3; + } + } +#endif #if defined(HAS_INTERPOLATEROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { InterpolateRow = InterpolateRow_Any_AVX2; @@ -5742,14 +5631,11 @@ void HalfMergeUVPlane(const uint8_t* src_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_uv, int width) = HalfMergeUVRow_C; - if (width <= 0 || height == 0 || height == INT_MIN) { - return; - } // Negative height means invert the image. if (height < 0) { height = -height; - src_u = src_u + (ptrdiff_t)(height - 1) * src_stride_u; - src_v = src_v + (ptrdiff_t)(height - 1) * src_stride_v; + src_u = src_u + (height - 1) * src_stride_u; + src_v = src_v + (height - 1) * src_stride_v; src_stride_u = -src_stride_u; src_stride_v = -src_stride_v; } diff --git a/source/rotate.cc b/source/rotate.cc index 60940f51f..d4a9fcd27 100644 --- a/source/rotate.cc +++ b/source/rotate.cc @@ -8,10 +8,9 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "libyuv/rotate.h" - #include -#include + +#include "libyuv/rotate.h" #include "libyuv/convert.h" #include "libyuv/cpu_id.h" @@ -129,7 +128,7 @@ void RotatePlane90(const uint8_t* src, // Rotate by 90 is a transpose with the source read // from bottom to top. So set the source pointer to the end // of the buffer and flip the sign of the source stride. - src += (ptrdiff_t)src_stride * (height - 1); + src += src_stride * (height - 1); src_stride = -src_stride; TransposePlane(src, src_stride, dst, dst_stride, width, height); } @@ -144,7 +143,7 @@ void RotatePlane270(const uint8_t* src, // Rotate by 270 is a transpose with the destination written // from bottom to top. So set the destination pointer to the end // of the buffer and flip the sign of the destination stride. - dst += (ptrdiff_t)dst_stride * (width - 1); + dst += dst_stride * (width - 1); dst_stride = -dst_stride; TransposePlane(src, src_stride, dst, dst_stride, width, height); } @@ -161,8 +160,8 @@ void RotatePlane180(const uint8_t* src, assert(row); if (!row) return; - const uint8_t* src_bot = src + (ptrdiff_t)src_stride * (height - 1); - uint8_t* dst_bot = dst + (ptrdiff_t)dst_stride * (height - 1); + const uint8_t* src_bot = src + src_stride * (height - 1); + uint8_t* dst_bot = dst + dst_stride * (height - 1); int half_height = (height + 1) >> 1; int y; void (*MirrorRow)(const uint8_t* src, uint8_t* dst, int width) = MirrorRow_C; @@ -355,7 +354,7 @@ void SplitRotateUV90(const uint8_t* src, int dst_stride_b, int width, int height) { - src += (ptrdiff_t)src_stride * (height - 1); + src += src_stride * (height - 1); src_stride = -src_stride; SplitTransposeUV(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b, @@ -398,14 +397,9 @@ void SplitRotateUV180(const uint8_t* src, MirrorSplitUVRow = MirrorSplitUVRow_NEON; } #endif -#if defined(HAS_MIRRORSPLITUVROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 32)) { - MirrorSplitUVRow = MirrorSplitUVRow_AVX2; - } -#endif -#if defined(HAS_MIRRORSPLITUVROW_AVX512BW) - if (TestCpuFlag(kCpuHasAVX512BW) && IS_ALIGNED(width, 32)) { - MirrorSplitUVRow = MirrorSplitUVRow_AVX512BW; +#if defined(HAS_MIRRORSPLITUVROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16)) { + MirrorSplitUVRow = MirrorSplitUVRow_SSSE3; } #endif #if defined(HAS_MIRRORSPLITUVROW_LSX) @@ -437,15 +431,14 @@ int SplitRotateUV(const uint8_t* src_uv, int width, int height, enum RotationMode mode) { - if (!src_uv || width <= 0 || height == 0 || height == INT_MIN || !dst_u || - !dst_v) { + if (!src_uv || width <= 0 || height == 0 || !dst_u || !dst_v) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_uv = src_uv + (ptrdiff_t)(height - 1) * src_stride_uv; + src_uv = src_uv + (height - 1) * src_stride_uv; src_stride_uv = -src_stride_uv; } @@ -480,14 +473,14 @@ int RotatePlane(const uint8_t* src, int width, int height, enum RotationMode mode) { - if (!src || width <= 0 || height == 0 || height == INT_MIN || !dst) { + if (!src || width <= 0 || height == 0 || !dst) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src = src + (ptrdiff_t)(height - 1) * src_stride; + src = src + (height - 1) * src_stride; src_stride = -src_stride; } @@ -540,7 +533,7 @@ static void RotatePlane90_16(const uint16_t* src, // Rotate by 90 is a transpose with the source read // from bottom to top. So set the source pointer to the end // of the buffer and flip the sign of the source stride. - src += (ptrdiff_t)src_stride * (height - 1); + src += src_stride * (height - 1); src_stride = -src_stride; TransposePlane_16(src, src_stride, dst, dst_stride, width, height); } @@ -554,7 +547,7 @@ static void RotatePlane270_16(const uint16_t* src, // Rotate by 270 is a transpose with the destination written // from bottom to top. So set the destination pointer to the end // of the buffer and flip the sign of the destination stride. - dst += (ptrdiff_t)dst_stride * (width - 1); + dst += dst_stride * (width - 1); dst_stride = -dst_stride; TransposePlane_16(src, src_stride, dst, dst_stride, width, height); } @@ -565,8 +558,8 @@ static void RotatePlane180_16(const uint16_t* src, int dst_stride, int width, int height) { - const uint16_t* src_bot = src + (ptrdiff_t)src_stride * (height - 1); - uint16_t* dst_bot = dst + (ptrdiff_t)dst_stride * (height - 1); + const uint16_t* src_bot = src + src_stride * (height - 1); + uint16_t* dst_bot = dst + dst_stride * (height - 1); int half_height = (height + 1) >> 1; int y; @@ -598,14 +591,14 @@ int RotatePlane_16(const uint16_t* src, int width, int height, enum RotationMode mode) { - if (!src || width <= 0 || height == 0 || height == INT_MIN || !dst) { + if (!src || width <= 0 || height == 0 || !dst) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src = src + (ptrdiff_t)(height - 1) * src_stride; + src = src + (height - 1) * src_stride; src_stride = -src_stride; } @@ -648,7 +641,7 @@ int I420Rotate(const uint8_t* src_y, int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; if ((!src_y && dst_y) || !src_u || !src_v || width <= 0 || height == 0 || - height == INT_MIN || !dst_y || !dst_u || !dst_v) { + !dst_y || !dst_u || !dst_v) { return -1; } @@ -656,9 +649,9 @@ int I420Rotate(const uint8_t* src_y, if (height < 0) { height = -height; halfheight = (height + 1) >> 1; - src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y; - src_u = src_u + (ptrdiff_t)(halfheight - 1) * src_stride_u; - src_v = src_v + (ptrdiff_t)(halfheight - 1) * src_stride_v; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (halfheight - 1) * src_stride_u; + src_v = src_v + (halfheight - 1) * src_stride_v; src_stride_y = -src_stride_y; src_stride_u = -src_stride_u; src_stride_v = -src_stride_v; @@ -718,16 +711,16 @@ int I422Rotate(const uint8_t* src_y, int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; int r; - if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || - height == INT_MIN || !dst_y || !dst_u || !dst_v) { + if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y || + !dst_u || !dst_v) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y; - src_u = src_u + (ptrdiff_t)(height - 1) * src_stride_u; - src_v = src_v + (ptrdiff_t)(height - 1) * src_stride_v; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (height - 1) * src_stride_u; + src_v = src_v + (height - 1) * src_stride_v; src_stride_y = -src_stride_y; src_stride_u = -src_stride_u; src_stride_v = -src_stride_v; @@ -813,17 +806,17 @@ int I444Rotate(const uint8_t* src_y, int width, int height, enum RotationMode mode) { - if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || - height == INT_MIN || !dst_y || !dst_u || !dst_v) { + if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y || + !dst_u || !dst_v) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y; - src_u = src_u + (ptrdiff_t)(height - 1) * src_stride_u; - src_v = src_v + (ptrdiff_t)(height - 1) * src_stride_v; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (height - 1) * src_stride_u; + src_v = src_v + (height - 1) * src_stride_v; src_stride_y = -src_stride_y; src_stride_u = -src_stride_u; src_stride_v = -src_stride_v; @@ -873,8 +866,8 @@ int NV12ToI420Rotate(const uint8_t* src_y, enum RotationMode mode) { int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; - if (!src_y || !src_uv || width <= 0 || height == 0 || height == INT_MIN || - !dst_y || !dst_u || !dst_v) { + if (!src_y || !src_uv || width <= 0 || height == 0 || !dst_y || !dst_u || + !dst_v) { return -1; } @@ -882,8 +875,8 @@ int NV12ToI420Rotate(const uint8_t* src_y, if (height < 0) { height = -height; halfheight = (height + 1) >> 1; - src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y; - src_uv = src_uv + (ptrdiff_t)(halfheight - 1) * src_stride_uv; + src_y = src_y + (height - 1) * src_stride_y; + src_uv = src_uv + (halfheight - 1) * src_stride_uv; src_stride_y = -src_stride_y; src_stride_uv = -src_stride_uv; } @@ -950,16 +943,16 @@ int Android420ToI420Rotate(const uint8_t* src_y, int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 || - height == 0 || height == INT_MIN) { + height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; halfheight = (height + 1) >> 1; - src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y; - src_u = src_u + (ptrdiff_t)(halfheight - 1) * src_stride_u; - src_v = src_v + (ptrdiff_t)(halfheight - 1) * src_stride_v; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (halfheight - 1) * src_stride_u; + src_v = src_v + (halfheight - 1) * src_stride_v; src_stride_y = -src_stride_y; src_stride_u = -src_stride_u; src_stride_v = -src_stride_v; @@ -1025,16 +1018,16 @@ int I010Rotate(const uint16_t* src_y, enum RotationMode mode) { int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; - if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || - height == INT_MIN || !dst_y || !dst_u || !dst_v || dst_stride_y < 0) { + if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y || + !dst_u || !dst_v || dst_stride_y < 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y; - src_u = src_u + (ptrdiff_t)(height - 1) * src_stride_u; - src_v = src_v + (ptrdiff_t)(height - 1) * src_stride_v; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (height - 1) * src_stride_u; + src_v = src_v + (height - 1) * src_stride_v; src_stride_y = -src_stride_y; src_stride_u = -src_stride_u; src_stride_v = -src_stride_v; @@ -1096,16 +1089,16 @@ int I210Rotate(const uint16_t* src_y, int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; int r; - if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || - height == INT_MIN || !dst_y || !dst_u || !dst_v) { + if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y || + !dst_u || !dst_v) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y; - src_u = src_u + (ptrdiff_t)(height - 1) * src_stride_u; - src_v = src_v + (ptrdiff_t)(height - 1) * src_stride_v; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (height - 1) * src_stride_u; + src_v = src_v + (height - 1) * src_stride_v; src_stride_y = -src_stride_y; src_stride_u = -src_stride_u; src_stride_v = -src_stride_v; @@ -1193,16 +1186,16 @@ int I410Rotate(const uint16_t* src_y, int width, int height, enum RotationMode mode) { - if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || - height == INT_MIN || !dst_y || !dst_u || !dst_v || dst_stride_y < 0) { + if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y || + !dst_u || !dst_v || dst_stride_y < 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y; - src_u = src_u + (ptrdiff_t)(height - 1) * src_stride_u; - src_v = src_v + (ptrdiff_t)(height - 1) * src_stride_v; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (height - 1) * src_stride_u; + src_v = src_v + (height - 1) * src_stride_v; src_stride_y = -src_stride_y; src_stride_u = -src_stride_u; src_stride_v = -src_stride_v; diff --git a/source/rotate_argb.cc b/source/rotate_argb.cc index 8cfaed034..8c76ca919 100644 --- a/source/rotate_argb.cc +++ b/source/rotate_argb.cc @@ -10,8 +10,6 @@ #include "libyuv/rotate_argb.h" -#include - #include "libyuv/convert.h" #include "libyuv/cpu_id.h" #include "libyuv/planar_functions.h" @@ -224,15 +222,14 @@ int ARGBRotate(const uint8_t* src_argb, int width, int height, enum RotationMode mode) { - if (!src_argb || width <= 0 || height == 0 || height == INT_MIN || - !dst_argb) { + if (!src_argb || width <= 0 || height == 0 || !dst_argb) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_argb = src_argb + (ptrdiff_t)(height - 1) * src_stride_argb; + src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } diff --git a/source/rotate_common.cc b/source/rotate_common.cc index 899405651..e0341fec4 100644 --- a/source/rotate_common.cc +++ b/source/rotate_common.cc @@ -191,10 +191,10 @@ void Transpose4x4_32_C(const uint8_t* src, ((uint32_t*)(dst3))[1] = p31; ((uint32_t*)(dst3))[2] = p32; ((uint32_t*)(dst3))[3] = p33; - src += (ptrdiff_t)src_stride * 4; // advance 4 rows - src1 += (ptrdiff_t)src_stride * 4; - src2 += (ptrdiff_t)src_stride * 4; - src3 += (ptrdiff_t)src_stride * 4; + src += src_stride * 4; // advance 4 rows + src1 += src_stride * 4; + src2 += src_stride * 4; + src3 += src_stride * 4; dst += 4 * 4; // advance 4 columns dst1 += 4 * 4; dst2 += 4 * 4; diff --git a/source/rotate_neon.cc b/source/rotate_neon.cc index de14c41b0..27bd2251b 100644 --- a/source/rotate_neon.cc +++ b/source/rotate_neon.cc @@ -198,16 +198,16 @@ void Transpose4x4_32_NEON(const uint8_t* src, "vst1.8 {q3}, [%7]! \n" "bgt 1b \n" - : "+r"(src), // %0 - "+r"(src1), // %1 - "+r"(src2), // %2 - "+r"(src3), // %3 - "+r"(dst), // %4 - "+r"(dst1), // %5 - "+r"(dst2), // %6 - "+r"(dst3), // %7 - "+r"(width) // %8 - : "r"((ptrdiff_t)src_stride * 4) // %9 + : "+r"(src), // %0 + "+r"(src1), // %1 + "+r"(src2), // %2 + "+r"(src3), // %3 + "+r"(dst), // %4 + "+r"(dst1), // %5 + "+r"(dst2), // %6 + "+r"(dst3), // %7 + "+r"(width) // %8 + : "r"((ptrdiff_t)(src_stride * 4)) // %9 : "memory", "cc", "q0", "q1", "q2", "q3"); } diff --git a/source/rotate_neon64.cc b/source/rotate_neon64.cc index 14f31d94c..e09bcb178 100644 --- a/source/rotate_neon64.cc +++ b/source/rotate_neon64.cc @@ -252,16 +252,16 @@ void Transpose4x4_32_NEON(const uint8_t* src, "st1 {v2.4s}, [%6], 16 \n" "st1 {v3.4s}, [%7], 16 \n" "b.gt 1b \n" - : "+r"(src), // %0 - "+r"(src1), // %1 - "+r"(src2), // %2 - "+r"(src3), // %3 - "+r"(dst), // %4 - "+r"(dst1), // %5 - "+r"(dst2), // %6 - "+r"(dst3), // %7 - "+r"(width) // %8 - : "r"((ptrdiff_t)src_stride * 4) // %9 + : "+r"(src), // %0 + "+r"(src1), // %1 + "+r"(src2), // %2 + "+r"(src3), // %3 + "+r"(dst), // %4 + "+r"(dst1), // %5 + "+r"(dst2), // %6 + "+r"(dst3), // %7 + "+r"(width) // %8 + : "r"((ptrdiff_t)(src_stride * 4)) // %9 : "memory", "cc", "v0", "v1", "v2", "v3"); } diff --git a/source/rotate_win.cc b/source/rotate_win.cc index 5b40f62a0..03eeee3a6 100644 --- a/source/rotate_win.cc +++ b/source/rotate_win.cc @@ -64,7 +64,7 @@ __declspec(naked) void TransposeWx8_SSSE3(const uint8_t* src, mov eax, ebp movdqa xmm7, xmm6 palignr xmm7, xmm7, 8 - // Second round of bit swap. + // Second round of bit swap. punpcklwd xmm0, xmm2 punpcklwd xmm1, xmm3 movdqa xmm2, xmm0 @@ -77,8 +77,8 @@ __declspec(naked) void TransposeWx8_SSSE3(const uint8_t* src, movdqa xmm7, xmm5 palignr xmm6, xmm6, 8 palignr xmm7, xmm7, 8 - // Third round of bit swap. - // Write to the destination pointer. + // Third round of bit swap. + // Write to the destination pointer. punpckldq xmm0, xmm4 movq qword ptr [edx], xmm0 movdqa xmm4, xmm0 @@ -173,7 +173,7 @@ __declspec(naked) void TransposeUVWx8_SSE2(const uint8_t* src, movdqa xmm7, xmm5 lea eax, [eax + 8 * edi + 16] neg edi - // Second round of bit swap. + // Second round of bit swap. movdqa xmm5, xmm0 punpcklwd xmm0, xmm2 punpckhwd xmm5, xmm2 @@ -193,8 +193,8 @@ __declspec(naked) void TransposeUVWx8_SSE2(const uint8_t* src, punpckhwd xmm6, xmm7 movdqa xmm7, xmm6 - // Third round of bit swap. - // Write to the destination pointer. + // Third round of bit swap. + // Write to the destination pointer. movdqa xmm6, xmm0 punpckldq xmm0, xmm4 punpckhdq xmm6, xmm4 diff --git a/source/row_any.cc b/source/row_any.cc index 919b231e6..4ae858560 100644 --- a/source/row_any.cc +++ b/source/row_any.cc @@ -10,6 +10,7 @@ #include "libyuv/row.h" +#include #include // For memset. #include "libyuv/basic_types.h" @@ -387,12 +388,6 @@ ANY31C(I444ToRGB24Row_Any_SSSE3, I444ToRGB24Row_SSSE3, 0, 0, 3, 15) #ifdef HAS_I422TORGB24ROW_AVX2 ANY31C(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 31) #endif -#ifdef HAS_I422TORGB24ROW_AVX512VBMI -ANY31C(I422ToRGB24Row_Any_AVX512VBMI, I422ToRGB24Row_AVX512VBMI, 1, 0, 3, 31) -#endif -#ifdef HAS_I422TORGB24ROW_AVX512BW -ANY31C(I422ToRGB24Row_Any_AVX512BW, I422ToRGB24Row_AVX512BW, 1, 0, 3, 31) -#endif #ifdef HAS_I422TOARGBROW_AVX2 ANY31C(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15) #endif @@ -951,7 +946,9 @@ ANY11(CopyRow_Any_NEON, CopyRow_NEON, 0, 1, 1, 31) #if defined(HAS_ARGBTORGB24ROW_SSSE3) ANY11(ARGBToRGB24Row_Any_SSSE3, ARGBToRGB24Row_SSSE3, 0, 4, 3, 15) ANY11(ARGBToRAWRow_Any_SSSE3, ARGBToRAWRow_SSSE3, 0, 4, 3, 15) - +ANY11(ARGBToRGB565Row_Any_SSE2, ARGBToRGB565Row_SSE2, 0, 4, 2, 3) +ANY11(ARGBToARGB1555Row_Any_SSE2, ARGBToARGB1555Row_SSE2, 0, 4, 2, 3) +ANY11(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, 0, 4, 2, 3) #endif #if defined(HAS_ARGBTORGB24ROW_AVX2) ANY11(ARGBToRGB24Row_Any_AVX2, ARGBToRGB24Row_AVX2, 0, 4, 3, 31) @@ -987,9 +984,8 @@ ANY11(ABGRToAR30Row_Any_AVX2, ABGRToAR30Row_AVX2, 0, 4, 4, 7) #if defined(HAS_ARGBTOAR30ROW_AVX2) ANY11(ARGBToAR30Row_Any_AVX2, ARGBToAR30Row_AVX2, 0, 4, 4, 7) #endif - -#if defined(HAS_J400TOARGBROW_AVX512BW) -ANY11(J400ToARGBRow_Any_AVX512BW, J400ToARGBRow_AVX512BW, 0, 1, 4, 31) +#if defined(HAS_J400TOARGBROW_SSE2) +ANY11(J400ToARGBRow_Any_SSE2, J400ToARGBRow_SSE2, 0, 1, 4, 7) #endif #if defined(HAS_J400TOARGBROW_AVX2) ANY11(J400ToARGBRow_Any_AVX2, J400ToARGBRow_AVX2, 0, 1, 4, 15) @@ -997,14 +993,13 @@ ANY11(J400ToARGBRow_Any_AVX2, J400ToARGBRow_AVX2, 0, 1, 4, 15) #if defined(HAS_RGB24TOARGBROW_SSSE3) ANY11(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, 0, 3, 4, 15) ANY11(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, 0, 3, 4, 15) - +ANY11(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, 0, 2, 4, 7) +ANY11(ARGB1555ToARGBRow_Any_SSE2, ARGB1555ToARGBRow_SSE2, 0, 2, 4, 7) +ANY11(ARGB4444ToARGBRow_Any_SSE2, ARGB4444ToARGBRow_SSE2, 0, 2, 4, 7) #endif #if defined(HAS_RAWTOARGBROW_AVX2) ANY11(RAWToARGBRow_Any_AVX2, RAWToARGBRow_AVX2, 0, 3, 4, 31) #endif -#if defined(HAS_RGB24TOARGBROW_AVX2) -ANY11(RGB24ToARGBRow_Any_AVX2, RGB24ToARGBRow_AVX2, 0, 3, 4, 31) -#endif #if defined(HAS_RAWTOARGBROW_AVX512BW) ANY11(RAWToARGBRow_Any_AVX512BW, RAWToARGBRow_AVX512BW, 0, 3, 4, 63) #endif @@ -1420,8 +1415,8 @@ ANY11B(ARGBCopyYToAlphaRow_Any_SSE2, ARGBCopyYToAlphaRow_SSE2, 0, 1, 4, 7) // Any 1 to 1 with parameter. #define ANY11P(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK) \ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, T param, int width) { \ - SIMD_ALIGNED(uint8_t vin[(MASK + 1) * SBPP]); \ - SIMD_ALIGNED(uint8_t vout[(MASK + 1) * BPP]); \ + SIMD_ALIGNED(uint8_t vin[64]); \ + SIMD_ALIGNED(uint8_t vout[64]); \ memset(vin, 0, sizeof(vin)); /* for msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ @@ -1467,6 +1462,14 @@ ANY11P(I400ToARGBRow_Any_LSX, 15) #endif +#if defined(HAS_ARGBTORGB565DITHERROW_SSE2) +ANY11P(ARGBToRGB565DitherRow_Any_SSE2, + ARGBToRGB565DitherRow_SSE2, + const uint32_t, + 4, + 2, + 3) +#endif #if defined(HAS_ARGBTORGB565DITHERROW_AVX2) ANY11P(ARGBToRGB565DitherRow_Any_AVX2, ARGBToRGB565DitherRow_AVX2, @@ -1505,14 +1508,6 @@ ANY11P(ARGBShuffleRow_Any_SSSE3, ARGBShuffleRow_SSSE3, const uint8_t*, 4, 4, 7) #ifdef HAS_ARGBSHUFFLEROW_AVX2 ANY11P(ARGBShuffleRow_Any_AVX2, ARGBShuffleRow_AVX2, const uint8_t*, 4, 4, 15) #endif -#ifdef HAS_ARGBSHUFFLEROW_AVX512BW -ANY11P(ARGBShuffleRow_Any_AVX512BW, - ARGBShuffleRow_AVX512BW, - const uint8_t*, - 4, - 4, - 31) -#endif #ifdef HAS_ARGBSHUFFLEROW_NEON ANY11P(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON, const uint8_t*, 4, 4, 3) #endif @@ -1835,9 +1830,18 @@ ANY11C(UYVYToARGBRow_Any_LSX, UYVYToARGBRow_LSX, 1, 4, 4, 7) memcpy(dst_ptr + np * BPP, vout, r * BPP * sizeof(TD)); \ } -#if defined(HAS_INTERPOLATEROW_AVX2) +#ifdef HAS_INTERPOLATEROW_AVX2 ANY11I(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, uint8_t, uint8_t, 1, 1, 31) #endif +#ifdef HAS_INTERPOLATEROW_SSSE3 +ANY11I(InterpolateRow_Any_SSSE3, + InterpolateRow_SSSE3, + uint8_t, + uint8_t, + 1, + 1, + 15) +#endif #ifdef HAS_INTERPOLATEROW_NEON ANY11I(InterpolateRow_Any_NEON, InterpolateRow_NEON, uint8_t, uint8_t, 1, 1, 15) #endif @@ -1854,15 +1858,6 @@ ANY11I(InterpolateRow_16_Any_NEON, 1, 7) #endif -#ifdef HAS_INTERPOLATEROW_16_AVX2 -ANY11I(InterpolateRow_16_Any_AVX2, - InterpolateRow_16_AVX2, - uint16_t, - uint16_t, - 1, - 1, - 15) -#endif #undef ANY11I // Any 1 to 1 interpolate with scale param @@ -1911,8 +1906,8 @@ ANY11IS(InterpolateRow_16To8_Any_AVX2, // Any 1 to 1 mirror. #define ANY11M(NAMEANY, ANY_SIMD, BPP, MASK) \ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \ - SIMD_ALIGNED(uint8_t vin[128]); \ - SIMD_ALIGNED(uint8_t vout[128]); \ + SIMD_ALIGNED(uint8_t vin[64]); \ + SIMD_ALIGNED(uint8_t vout[64]); \ memset(vin, 0, sizeof(vin)); /* for msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ @@ -1920,14 +1915,11 @@ ANY11IS(InterpolateRow_16To8_Any_AVX2, ANY_SIMD(src_ptr + r * BPP, dst_ptr, n); \ } \ ptrdiff_t np = n; \ - memcpy(vin, src_ptr, r * BPP); \ + memcpy(vin, src_ptr, r* BPP); \ ANY_SIMD(vin, vout, MASK + 1); \ memcpy(dst_ptr + np * BPP, vout + (MASK + 1 - r) * BPP, r * BPP); \ } -#ifdef HAS_MIRRORROW_AVX512BW -ANY11M(MirrorRow_Any_AVX512BW, MirrorRow_AVX512BW, 1, 63) -#endif #ifdef HAS_MIRRORROW_AVX2 ANY11M(MirrorRow_Any_AVX2, MirrorRow_AVX2, 1, 31) #endif @@ -1946,6 +1938,9 @@ ANY11M(MirrorRow_Any_LASX, MirrorRow_LASX, 1, 63) #ifdef HAS_MIRRORUVROW_AVX2 ANY11M(MirrorUVRow_Any_AVX2, MirrorUVRow_AVX2, 2, 15) #endif +#ifdef HAS_MIRRORUVROW_SSSE3 +ANY11M(MirrorUVRow_Any_SSSE3, MirrorUVRow_SSSE3, 2, 7) +#endif #ifdef HAS_MIRRORUVROW_NEON ANY11M(MirrorUVRow_Any_NEON, MirrorUVRow_NEON, 2, 31) #endif @@ -1970,8 +1965,8 @@ ANY11M(ARGBMirrorRow_Any_LSX, ARGBMirrorRow_LSX, 4, 7) #ifdef HAS_ARGBMIRRORROW_LASX ANY11M(ARGBMirrorRow_Any_LASX, ARGBMirrorRow_LASX, 4, 15) #endif -#ifdef HAS_RGB24MIRRORROW_AVX2 -ANY11M(RGB24MirrorRow_Any_AVX2, RGB24MirrorRow_AVX2, 3, 31) +#ifdef HAS_RGB24MIRRORROW_SSSE3 +ANY11M(RGB24MirrorRow_Any_SSSE3, RGB24MirrorRow_SSSE3, 3, 15) #endif #ifdef HAS_RGB24MIRRORROW_NEON ANY11M(RGB24MirrorRow_Any_NEON, RGB24MirrorRow_NEON, 3, 15) @@ -2031,9 +2026,6 @@ ANY1(ARGBSetRow_Any_LSX, ARGBSetRow_LSX, uint32_t, 4, 3) #ifdef HAS_SPLITUVROW_SSE2 ANY12(SplitUVRow_Any_SSE2, SplitUVRow_SSE2, 0, 2, 0, 15) #endif -#ifdef HAS_SPLITUVROW_AVX512BW -ANY12(SplitUVRow_Any_AVX512BW, SplitUVRow_AVX512BW, 0, 2, 0, 63) -#endif #ifdef HAS_SPLITUVROW_AVX2 ANY12(SplitUVRow_Any_AVX2, SplitUVRow_AVX2, 0, 2, 0, 31) #endif @@ -2205,7 +2197,7 @@ ANY14(SplitARGBRow_Any_NEON, SplitARGBRow_NEON, 4, 15) uint8_t* dst_v, int width) { \ SIMD_ALIGNED(uint8_t vin[256 * 2]); \ SIMD_ALIGNED(uint8_t vout[256 * 2]); \ - memset(vin, 0, sizeof(vin)); /* for msan */ \ + memset(vin, 0, sizeof(vin)); /* for msan */ \ memset(vout, 0, sizeof(vout)); /* for msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ @@ -2227,29 +2219,29 @@ ANY14(SplitARGBRow_Any_NEON, SplitARGBRow_NEON, 4, 15) memcpy(dst_v + (np >> 1), vout + 256, SS(r, 1)); \ } -#define ANY12M(NAMEANY, ANY_SIMD, BPP, MASK) \ - void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, \ - int width, const struct ArgbConstants* c) { \ - SIMD_ALIGNED(uint8_t vin[256]); \ - SIMD_ALIGNED(uint8_t vout[256 * 2]); \ - memset(vin, 0, sizeof(vin)); /* for msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(src_ptr, dst_u, dst_v, n, c); \ - } \ - memcpy(vin, src_ptr + (ptrdiff_t)n * BPP, (ptrdiff_t)r * BPP); \ - ANY_SIMD(vin, vout, vout + 256, MASK + 1, c); \ - memcpy(dst_u + (ptrdiff_t)n, vout, (ptrdiff_t)r); \ - memcpy(dst_v + (ptrdiff_t)n, vout + 256, (ptrdiff_t)r); \ +#define ANY12M(NAMEANY, ANY_SIMD, BPP, MASK) \ + void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, \ + int width, const struct ArgbConstants* c) { \ + SIMD_ALIGNED(uint8_t vin[256]); \ + SIMD_ALIGNED(uint8_t vout[256 * 2]); \ + memset(vin, 0, sizeof(vin)); /* for msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(src_ptr, dst_u, dst_v, n, c); \ + } \ + memcpy(vin, src_ptr + (ptrdiff_t)n * BPP, (ptrdiff_t)r * BPP); \ + ANY_SIMD(vin, vout, vout + 256, MASK + 1, c); \ + memcpy(dst_u + (ptrdiff_t)n, vout, (ptrdiff_t)r); \ + memcpy(dst_v + (ptrdiff_t)n, vout + 256, (ptrdiff_t)r); \ } #define ANY12MS(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \ - void NAMEANY(const uint8_t* src_ptr, int src_stride, uint8_t* dst_u, \ - uint8_t* dst_v, int width, const struct ArgbConstants* c) { \ + void NAMEANY(const uint8_t* src_ptr, int src_stride, uint8_t* dst_u, \ + uint8_t* dst_v, int width, const struct ArgbConstants* c) { \ SIMD_ALIGNED(uint8_t vin[256 * 2]); \ SIMD_ALIGNED(uint8_t vout[256 * 2]); \ - memset(vin, 0, sizeof(vin)); /* for msan */ \ + memset(vin, 0, sizeof(vin)); /* for msan */ \ memset(vout, 0, sizeof(vout)); /* for msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ @@ -2277,35 +2269,12 @@ ANY12MS(ARGBToUVMatrixRow_Any_NEON, ARGBToUVMatrixRow_NEON, 0, 4, 15) #ifdef HAS_ARGBTOUVMATRIXROW_NEON_I8MM ANY12MS(ARGBToUVMatrixRow_Any_NEON_I8MM, ARGBToUVMatrixRow_NEON_I8MM, 0, 4, 15) #endif -#ifdef HAS_RGBTOUVMATRIXROW_NEON -ANY12MS(RGBToUVMatrixRow_Any_NEON, RGBToUVMatrixRow_NEON, 0, 3, 15) -#endif -#ifdef HAS_RGB565TOUVMATRIXROW_NEON -ANY12MS(RGB565ToUVMatrixRow_Any_NEON, RGB565ToUVMatrixRow_NEON, 0, 2, 15) -#endif -#ifdef HAS_ARGB1555TOUVMATRIXROW_NEON -ANY12MS(ARGB1555ToUVMatrixRow_Any_NEON, ARGB1555ToUVMatrixRow_NEON, 0, 2, 15) -#endif -#ifdef HAS_ARGB4444TOUVMATRIXROW_NEON -ANY12MS(ARGB4444ToUVMatrixRow_Any_NEON, ARGB4444ToUVMatrixRow_NEON, 0, 2, 15) -#endif #ifdef HAS_ARGBTOUVMATRIXROW_AVX2 -ANY12MS(ARGBToUVMatrixRow_Any_AVX2, ARGBToUVMatrixRow_AVX2, 0, 4, 31) -ANY12MS(RGBToUVMatrixRow_Any_AVX2, RGBToUVMatrixRow_AVX2, 0, 3, 31) -ANY12MS(RGB565ToUVMatrixRow_Any_AVX2, RGB565ToUVMatrixRow_AVX2, 0, 2, 31) -#ifdef HAS_ARGB1555TOARGBROW_AVX2 -ANY12MS(ARGB1555ToUVMatrixRow_Any_AVX2, ARGB1555ToUVMatrixRow_AVX2, 0, 2, 31) -#endif -#ifdef HAS_ARGB4444TOARGBROW_AVX2 -ANY12MS(ARGB4444ToUVMatrixRow_Any_AVX2, ARGB4444ToUVMatrixRow_AVX2, 0, 2, 31) -#endif +ANY12MS(ARGBToUVMatrixRow_Any_AVX2, ARGBToUVMatrixRow_AVX2, 0, 4, 15) #endif #ifdef HAS_ARGBTOUVMATRIXROW_AVX512BW ANY12MS(ARGBToUVMatrixRow_Any_AVX512BW, ARGBToUVMatrixRow_AVX512BW, 0, 4, 63) #endif -#ifdef HAS_RGBTOUVMATRIXROW_AVX512BW -ANY12MS(RGBToUVMatrixRow_Any_AVX512BW, RGBToUVMatrixRow_AVX512BW, 0, 3, 63) -#endif #ifdef HAS_ARGBTOUVMATRIXROW_SSSE3 ANY12MS(ARGBToUVMatrixRow_Any_SSSE3, ARGBToUVMatrixRow_SSSE3, 0, 4, 7) #endif @@ -2322,20 +2291,20 @@ ANY12M(ARGBToUV444MatrixRow_Any_SSSE3, ARGBToUV444MatrixRow_SSSE3, 4, 15) ANY12M(ARGBToUV444MatrixRow_Any_NEON, ARGBToUV444MatrixRow_NEON, 4, 7) #endif -#define ANY11MC(NAMEANY, ANY_SIMD, BPP, MASK) \ - void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width, \ - const struct ArgbConstants* c) { \ - SIMD_ALIGNED(uint8_t vin[256]); \ - SIMD_ALIGNED(uint8_t vout[256]); \ - memset(vin, 0, sizeof(vin)); /* for msan */ \ - int r = width & MASK; \ - int n = width & ~MASK; \ - if (n > 0) { \ - ANY_SIMD(src_ptr, dst_ptr, n, c); \ - } \ - memcpy(vin, src_ptr + (ptrdiff_t)n * BPP, (ptrdiff_t)r * BPP); \ - ANY_SIMD(vin, vout, MASK + 1, c); \ - memcpy(dst_ptr + (ptrdiff_t)n, vout, (ptrdiff_t)r); \ +#define ANY11MC(NAMEANY, ANY_SIMD, BPP, MASK) \ + void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width, \ + const struct ArgbConstants* c) { \ + SIMD_ALIGNED(uint8_t vin[256]); \ + SIMD_ALIGNED(uint8_t vout[256]); \ + memset(vin, 0, sizeof(vin)); /* for msan */ \ + int r = width & MASK; \ + int n = width & ~MASK; \ + if (n > 0) { \ + ANY_SIMD(src_ptr, dst_ptr, n, c); \ + } \ + memcpy(vin, src_ptr + (ptrdiff_t)n * BPP, (ptrdiff_t)r * BPP); \ + ANY_SIMD(vin, vout, MASK + 1, c); \ + memcpy(dst_ptr + (ptrdiff_t)n, vout, (ptrdiff_t)r); \ } #ifdef HAS_ARGBTOYROW_SSSE3 @@ -2343,14 +2312,6 @@ ANY11MC(ARGBToYMatrixRow_Any_SSSE3, ARGBToYMatrixRow_SSSE3, 4, 15) #endif #ifdef HAS_ARGBTOYROW_AVX2 ANY11MC(ARGBToYMatrixRow_Any_AVX2, ARGBToYMatrixRow_AVX2, 4, 31) -ANY11MC(RGBToYMatrixRow_Any_AVX2, RGBToYMatrixRow_AVX2, 3, 31) -ANY11MC(RGB565ToYMatrixRow_Any_AVX2, RGB565ToYMatrixRow_AVX2, 2, 31) -#ifdef HAS_ARGB1555TOYMATRIXROW_AVX2 -ANY11MC(ARGB1555ToYMatrixRow_Any_AVX2, ARGB1555ToYMatrixRow_AVX2, 2, 31) -#endif -#ifdef HAS_ARGB4444TOYMATRIXROW_AVX2 -ANY11MC(ARGB4444ToYMatrixRow_Any_AVX2, ARGB4444ToYMatrixRow_AVX2, 2, 31) -#endif #endif #ifdef HAS_ARGBTOYROW_AVX512BW ANY11MC(ARGBToYMatrixRow_Any_AVX512BW, ARGBToYMatrixRow_AVX512BW, 4, 63) @@ -2361,18 +2322,6 @@ ANY11MC(ARGBToYMatrixRow_Any_NEON, ARGBToYMatrixRow_NEON, 4, 15) #ifdef HAS_ARGBTOYMATRIXROW_NEON_DOTPROD ANY11MC(ARGBToYMatrixRow_Any_NEON_DotProd, ARGBToYMatrixRow_NEON_DotProd, 4, 15) #endif -#ifdef HAS_RGBTOYMATRIXROW_NEON -ANY11MC(RGBToYMatrixRow_Any_NEON, RGBToYMatrixRow_NEON, 3, 15) -#endif -#ifdef HAS_RGB565TOYMATRIXROW_NEON -ANY11MC(RGB565ToYMatrixRow_Any_NEON, RGB565ToYMatrixRow_NEON, 2, 15) -#endif -#ifdef HAS_ARGB1555TOYMATRIXROW_NEON -ANY11MC(ARGB1555ToYMatrixRow_Any_NEON, ARGB1555ToYMatrixRow_NEON, 2, 15) -#endif -#ifdef HAS_ARGB4444TOYMATRIXROW_NEON -ANY11MC(ARGB4444ToYMatrixRow_Any_NEON, ARGB4444ToYMatrixRow_NEON, 2, 15) -#endif #ifdef HAS_ARGBTOYMATRIXROW_LSX ANY11MC(ARGBToYMatrixRow_Any_LSX, ARGBToYMatrixRow_LSX, 4, 15) #endif diff --git a/source/row_common.cc b/source/row_common.cc index 70ceaf5c8..b2a0ec12b 100644 --- a/source/row_common.cc +++ b/source/row_common.cc @@ -14,7 +14,7 @@ #include // For memcpy and memset. #include "libyuv/basic_types.h" -#include "libyuv/convert_argb.h" // For kYuvI601Constants +#include "libyuv/convert_argb.h" // For kYuvI601Constants #include "libyuv/convert_from_argb.h" // For ArgbConstants #ifdef __cplusplus @@ -37,6 +37,10 @@ extern "C" { // LIBYUV_UNLIMITED_BT709 // LIBYUV_UNLIMITED_BT2020 +#if defined(LIBYUV_BIT_EXACT) +#define LIBYUV_UNATTENUATE_DUP 1 +#endif + // llvm x86 is poor at ternary operator, so use branchless min/max. #define USE_BRANCHLESS 1 @@ -749,31 +753,28 @@ MAKEROWYJ(ABGR, 0, 1, 2, 4) MAKEROWYJ(RGBA, 3, 2, 1, 4) #undef MAKEROWYJ -static __inline uint8_t RGBToYMatrix(uint8_t b0, - uint8_t b1, - uint8_t b2, - uint8_t b3, +static __inline uint8_t RGBToYMatrix(uint8_t r, + uint8_t g, + uint8_t b, const struct ArgbConstants* c) { - return (c->kRGBToY[0] * b0 + c->kRGBToY[1] * b1 + c->kRGBToY[2] * b2 + - c->kRGBToY[3] * b3 + c->kAddY[0]) >> + return (c->kRGBToY[2] * r + c->kRGBToY[1] * g + c->kRGBToY[0] * b + + c->kAddY[0]) >> 8; } -static __inline uint8_t RGBToUMatrix(uint8_t b0, - uint8_t b1, - uint8_t b2, - uint8_t b3, +static __inline uint8_t RGBToUMatrix(uint8_t r, + uint8_t g, + uint8_t b, const struct ArgbConstants* c) { - return (c->kAddUV[0] - (c->kRGBToU[0] * b0 + c->kRGBToU[1] * b1 + - c->kRGBToU[2] * b2 + c->kRGBToU[3] * b3)) >> + return (c->kAddUV[0] - + (c->kRGBToU[2] * r + c->kRGBToU[1] * g + c->kRGBToU[0] * b)) >> 8; } -static __inline uint8_t RGBToVMatrix(uint8_t b0, - uint8_t b1, - uint8_t b2, - uint8_t b3, +static __inline uint8_t RGBToVMatrix(uint8_t r, + uint8_t g, + uint8_t b, const struct ArgbConstants* c) { - return (c->kAddUV[0] - (c->kRGBToV[0] * b0 + c->kRGBToV[1] * b1 + - c->kRGBToV[2] * b2 + c->kRGBToV[3] * b3)) >> + return (c->kAddUV[0] - + (c->kRGBToV[2] * r + c->kRGBToV[1] * g + c->kRGBToV[0] * b)) >> 8; } @@ -783,8 +784,7 @@ void ARGBToYMatrixRow_C(const uint8_t* src_argb, const struct ArgbConstants* c) { int x; for (x = 0; x < width; ++x) { - dst_y[0] = - RGBToYMatrix(src_argb[0], src_argb[1], src_argb[2], src_argb[3], c); + dst_y[0] = RGBToYMatrix(src_argb[2], src_argb[1], src_argb[0], c); src_argb += 4; dst_y += 1; } @@ -799,28 +799,25 @@ void ARGBToUVMatrixRow_C(const uint8_t* src_argb, const uint8_t* src_argb1 = src_argb + src_stride_argb; int x; for (x = 0; x < width - 1; x += 2) { - uint8_t b0 = + uint8_t ab = (src_argb[0] + src_argb[4] + src_argb1[0] + src_argb1[4] + 2) >> 2; - uint8_t b1 = + uint8_t ag = (src_argb[1] + src_argb[5] + src_argb1[1] + src_argb1[5] + 2) >> 2; - uint8_t b2 = + uint8_t ar = (src_argb[2] + src_argb[6] + src_argb1[2] + src_argb1[6] + 2) >> 2; - uint8_t b3 = - (src_argb[3] + src_argb[7] + src_argb1[3] + src_argb1[7] + 2) >> 2; - dst_u[0] = RGBToUMatrix(b0, b1, b2, b3, c); - dst_v[0] = RGBToVMatrix(b0, b1, b2, b3, c); + dst_u[0] = RGBToUMatrix(ar, ag, ab, c); + dst_v[0] = RGBToVMatrix(ar, ag, ab, c); src_argb += 8; src_argb1 += 8; dst_u += 1; dst_v += 1; } if (width & 1) { - uint8_t b0 = (src_argb[0] + src_argb1[0] + 1) >> 1; - uint8_t b1 = (src_argb[1] + src_argb1[1] + 1) >> 1; - uint8_t b2 = (src_argb[2] + src_argb1[2] + 1) >> 1; - uint8_t b3 = (src_argb[3] + src_argb1[3] + 1) >> 1; - dst_u[0] = RGBToUMatrix(b0, b1, b2, b3, c); - dst_v[0] = RGBToVMatrix(b0, b1, b2, b3, c); + uint8_t ab = (src_argb[0] + src_argb1[0] + 1) >> 1; + uint8_t ag = (src_argb[1] + src_argb1[1] + 1) >> 1; + uint8_t ar = (src_argb[2] + src_argb1[2] + 1) >> 1; + dst_u[0] = RGBToUMatrix(ar, ag, ab, c); + dst_v[0] = RGBToVMatrix(ar, ag, ab, c); } } @@ -831,10 +828,11 @@ void ARGBToUV444MatrixRow_C(const uint8_t* src_argb, const struct ArgbConstants* c) { int x; for (x = 0; x < width; ++x) { - dst_u[0] = - RGBToUMatrix(src_argb[0], src_argb[1], src_argb[2], src_argb[3], c); - dst_v[0] = - RGBToVMatrix(src_argb[0], src_argb[1], src_argb[2], src_argb[3], c); + uint8_t ab = src_argb[0]; + uint8_t ag = src_argb[1]; + uint8_t ar = src_argb[2]; + dst_u[0] = RGBToUMatrix(ar, ag, ab, c); + dst_v[0] = RGBToVMatrix(ar, ag, ab, c); src_argb += 4; dst_u += 1; dst_v += 1; @@ -1514,18 +1512,18 @@ void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) { const struct YuvConstants SIMD_ALIGNED(kYvu##name##Constants) = \ YUVCONSTANTSBODY(YG, YB, VR, VG, UG, UB); -#define MAKEARGBCONSTANTS(name, RY, GY, BY, RU, GU, BU, RV, GV, BV, AY, AUV) \ - extern const struct ArgbConstants SIMD_ALIGNED(kArgb##name##Constants) = \ - ARGBCONSTANTSBODY(BY, GY, RY, 0, -(BU), -(GU), -(RU), 0, -(BV), -(GV), \ - -(RV), 0, AY, AUV); \ - extern const struct ArgbConstants SIMD_ALIGNED(kAbgr##name##Constants) = \ - ARGBCONSTANTSBODY(RY, GY, BY, 0, -(RU), -(GU), -(BU), 0, -(RV), -(GV), \ - -(BV), 0, AY, AUV); \ - extern const struct ArgbConstants SIMD_ALIGNED(kRgba##name##Constants) = \ - ARGBCONSTANTSBODY(0, BY, GY, RY, 0, -(BU), -(GU), -(RU), 0, -(BV), \ - -(GV), -(RV), AY, AUV); \ - extern const struct ArgbConstants SIMD_ALIGNED(kBgra##name##Constants) = \ - ARGBCONSTANTSBODY(0, RY, GY, BY, 0, -(RU), -(GU), -(BU), 0, -(RV), \ +#define MAKEARGBCONSTANTS(name, RY, GY, BY, RU, GU, BU, RV, GV, BV, AY, AUV) \ + const struct ArgbConstants SIMD_ALIGNED(kArgb##name##Constants) = \ + ARGBCONSTANTSBODY(BY, GY, RY, 0, -(BU), -(GU), -(RU), 0, -(BV), -(GV), \ + -(RV), 0, AY, AUV); \ + const struct ArgbConstants SIMD_ALIGNED(kAbgr##name##Constants) = \ + ARGBCONSTANTSBODY(RY, GY, BY, 0, -(RU), -(GU), -(BU), 0, -(RV), -(GV), \ + -(BV), 0, AY, AUV); \ + const struct ArgbConstants SIMD_ALIGNED(kRgba##name##Constants) = \ + ARGBCONSTANTSBODY(0, BY, GY, RY, 0, -(BU), -(GU), -(RU), 0, -(BV), \ + -(GV), -(RV), AY, AUV); \ + const struct ArgbConstants SIMD_ALIGNED(kBgra##name##Constants) = \ + ARGBCONSTANTSBODY(0, RY, GY, BY, 0, -(RU), -(GU), -(BU), 0, -(RV), \ -(GV), -(BV), AY, AUV); // BT.601 limited range RGB to YUV coefficients @@ -3468,7 +3466,7 @@ void ARGBBlendRow_C(const uint8_t* src_argb, } #undef BLEND -#define UBLEND(f, b, a) (((a) * f) + ((255 - a) * b) + 255) >> 8 +#define UBLEND(f, b, a) (((a)*f) + ((255 - a) * b) + 255) >> 8 void BlendPlaneRow_C(const uint8_t* src0, const uint8_t* src1, const uint8_t* alpha, @@ -3575,8 +3573,12 @@ const uint32_t fixed_invtbl8[256] = { T(0xfc), T(0xfd), T(0xfe), 0x01000100}; #undef T +#if defined(LIBYUV_UNATTENUATE_DUP) // This code mimics the Intel SIMD version for better testability. #define UNATTENUATE(f, ia) clamp255(((f | (f << 8)) * ia) >> 16) +#else +#define UNATTENUATE(f, ia) clamp255((f * ia) >> 8) +#endif // mimics the Intel SIMD code for exactness. void ARGBUnattenuateRow_C(const uint8_t* src_argb, @@ -3664,8 +3666,7 @@ void ARGBAffineRow_C(const uint8_t* src_argb, int x = (int)(uv[0]); int y = (int)(uv[1]); *(uint32_t*)(dst_argb) = - *(const uint32_t*)(src_argb + (ptrdiff_t)y * src_argb_stride + - (ptrdiff_t)x * 4); + *(const uint32_t*)(src_argb + y * src_argb_stride + x * 4); dst_argb += 4; uv[0] += uv_dudv[2]; uv[1] += uv_dudv[3]; @@ -4171,7 +4172,7 @@ void NV21ToRGB24Row_SSSE3(const uint8_t* src_y, } #endif -#if defined(HAS_NV12TOARGBROW_AVX2) && defined(HAS_ARGBTORGB24ROW_AVX2) +#if defined(HAS_NV12TORGB24ROW_AVX2) void NV12ToRGB24Row_AVX2(const uint8_t* src_y, const uint8_t* src_uv, uint8_t* dst_rgb24, @@ -4182,7 +4183,11 @@ void NV12ToRGB24Row_AVX2(const uint8_t* src_y, while (width > 0) { int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth); +#if defined(HAS_ARGBTORGB24ROW_AVX2) ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth); +#else + ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth); +#endif src_y += twidth; src_uv += twidth; dst_rgb24 += twidth * 3; @@ -4191,7 +4196,7 @@ void NV12ToRGB24Row_AVX2(const uint8_t* src_y, } #endif -#if defined(HAS_NV21TOARGBROW_AVX2) && defined(HAS_ARGBTORGB24ROW_AVX2) +#if defined(HAS_NV21TORGB24ROW_AVX2) void NV21ToRGB24Row_AVX2(const uint8_t* src_y, const uint8_t* src_vu, uint8_t* dst_rgb24, @@ -4202,7 +4207,11 @@ void NV21ToRGB24Row_AVX2(const uint8_t* src_y, while (width > 0) { int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; NV21ToARGBRow_AVX2(src_y, src_vu, row, yuvconstants, twidth); +#if defined(HAS_ARGBTORGB24ROW_AVX2) ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth); +#else + ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth); +#endif src_y += twidth; src_vu += twidth; dst_rgb24 += twidth * 3; @@ -4211,7 +4220,7 @@ void NV21ToRGB24Row_AVX2(const uint8_t* src_y, } #endif -#if defined(HAS_I422TOARGBROW_AVX2) && defined(HAS_ARGBTORGB565ROW_AVX2) +#if defined(HAS_I422TORGB565ROW_AVX2) void I422ToRGB565Row_AVX2(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -4222,7 +4231,11 @@ void I422ToRGB565Row_AVX2(const uint8_t* src_y, while (width > 0) { int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth); +#if defined(HAS_ARGBTORGB565ROW_AVX2) ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth); +#else + ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth); +#endif src_y += twidth; src_u += twidth / 2; src_v += twidth / 2; @@ -4232,7 +4245,7 @@ void I422ToRGB565Row_AVX2(const uint8_t* src_y, } #endif -#if defined(HAS_I422TOARGBROW_AVX2) && defined(HAS_ARGBTOARGB1555ROW_AVX2) +#if defined(HAS_I422TOARGB1555ROW_AVX2) void I422ToARGB1555Row_AVX2(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -4244,7 +4257,11 @@ void I422ToARGB1555Row_AVX2(const uint8_t* src_y, while (width > 0) { int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth); +#if defined(HAS_ARGBTOARGB1555ROW_AVX2) ARGBToARGB1555Row_AVX2(row, dst_argb1555, twidth); +#else + ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth); +#endif src_y += twidth; src_u += twidth / 2; src_v += twidth / 2; @@ -4254,7 +4271,7 @@ void I422ToARGB1555Row_AVX2(const uint8_t* src_y, } #endif -#if defined(HAS_I422TOARGBROW_AVX2) && defined(HAS_ARGBTOARGB4444ROW_AVX2) +#if defined(HAS_I422TOARGB4444ROW_AVX2) void I422ToARGB4444Row_AVX2(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -4266,7 +4283,11 @@ void I422ToARGB4444Row_AVX2(const uint8_t* src_y, while (width > 0) { int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth); +#if defined(HAS_ARGBTOARGB4444ROW_AVX2) ARGBToARGB4444Row_AVX2(row, dst_argb4444, twidth); +#else + ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth); +#endif src_y += twidth; src_u += twidth / 2; src_v += twidth / 2; @@ -4276,7 +4297,7 @@ void I422ToARGB4444Row_AVX2(const uint8_t* src_y, } #endif -#if defined(HAS_I422TOARGBROW_AVX2) && defined(HAS_ARGBTORGB24ROW_AVX2) +#if defined(HAS_I422TORGB24ROW_AVX2) void I422ToRGB24Row_AVX2(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -4288,7 +4309,11 @@ void I422ToRGB24Row_AVX2(const uint8_t* src_y, while (width > 0) { int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth); +#if defined(HAS_ARGBTORGB24ROW_AVX2) ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth); +#else + ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth); +#endif src_y += twidth; src_u += twidth / 2; src_v += twidth / 2; @@ -4298,51 +4323,7 @@ void I422ToRGB24Row_AVX2(const uint8_t* src_y, } #endif -#if defined(HAS_I422TOARGBROW_AVX512BW) && defined(HAS_ARGBTORGB24ROW_AVX512VBMI) -void I422ToRGB24Row_AVX512VBMI(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_rgb24, - const struct YuvConstants* yuvconstants, - int width) { - // Row buffer for intermediate ARGB pixels. - SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - I422ToARGBRow_AVX512BW(src_y, src_u, src_v, row, yuvconstants, twidth); - ARGBToRGB24Row_AVX512VBMI(row, dst_rgb24, twidth); - src_y += twidth; - src_u += twidth / 2; - src_v += twidth / 2; - dst_rgb24 += twidth * 3; - width -= twidth; - } -} -#endif - -#if defined(HAS_I422TOARGBROW_AVX512BW) && defined(HAS_ARGBTORGB24ROW_AVX2) -void I422ToRGB24Row_AVX512BW(const uint8_t* src_y, - const uint8_t* src_u, - const uint8_t* src_v, - uint8_t* dst_rgb24, - const struct YuvConstants* yuvconstants, - int width) { - // Row buffer for intermediate ARGB pixels. - SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - I422ToARGBRow_AVX512BW(src_y, src_u, src_v, row, yuvconstants, twidth); - ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth); - src_y += twidth; - src_u += twidth / 2; - src_v += twidth / 2; - dst_rgb24 += twidth * 3; - width -= twidth; - } -} -#endif - -#if defined(HAS_I444TOARGBROW_AVX2) && defined(HAS_ARGBTORGB24ROW_AVX2) +#if defined(HAS_I444TORGB24ROW_AVX2) void I444ToRGB24Row_AVX2(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, @@ -4354,7 +4335,11 @@ void I444ToRGB24Row_AVX2(const uint8_t* src_y, while (width > 0) { int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; I444ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth); +#if defined(HAS_ARGBTORGB24ROW_AVX2) ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth); +#else + ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth); +#endif src_y += twidth; src_u += twidth; src_v += twidth; @@ -4364,7 +4349,7 @@ void I444ToRGB24Row_AVX2(const uint8_t* src_y, } #endif -#if defined(HAS_NV12TOARGBROW_AVX2) && defined(HAS_ARGBTORGB565ROW_AVX2) +#if defined(HAS_NV12TORGB565ROW_AVX2) void NV12ToRGB565Row_AVX2(const uint8_t* src_y, const uint8_t* src_uv, uint8_t* dst_rgb565, @@ -4375,7 +4360,11 @@ void NV12ToRGB565Row_AVX2(const uint8_t* src_y, while (width > 0) { int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth); +#if defined(HAS_ARGBTORGB565ROW_AVX2) ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth); +#else + ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth); +#endif src_y += twidth; src_uv += twidth; dst_rgb565 += twidth * 2; @@ -4384,6 +4373,26 @@ void NV12ToRGB565Row_AVX2(const uint8_t* src_y, } #endif +#ifdef HAS_RGB24TOYJROW_AVX2 +// Convert 16 RGB24 pixels (64 bytes) to 16 YJ values. +} +#endif // HAS_RGB24TOYJROW_AVX2 + +#ifdef HAS_RAWTOYJROW_AVX2 +// Convert 32 RAW pixels (128 bytes) to 32 YJ values. +} +#endif // HAS_RAWTOYJROW_AVX2 + +#ifdef HAS_RGB24TOYJROW_SSSE3 +// Convert 16 RGB24 pixels (64 bytes) to 16 YJ values. +} +#endif // HAS_RGB24TOYJROW_SSSE3 + +#ifdef HAS_RAWTOYJROW_SSSE3 +// Convert 16 RAW pixels (64 bytes) to 16 YJ values. +} +#endif // HAS_RAWTOYJROW_SSSE3 + #ifdef HAS_INTERPOLATEROW_16TO8_AVX2 void InterpolateRow_16To8_AVX2(uint8_t* dst_ptr, const uint16_t* src_ptr, @@ -4395,7 +4404,7 @@ void InterpolateRow_16To8_AVX2(uint8_t* dst_ptr, SIMD_ALIGNED(uint16_t row[MAXTWIDTH]); while (width > 0) { int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - InterpolateRow_16_AVX2(row, src_ptr, src_stride, twidth, source_y_fraction); + InterpolateRow_16_C(row, src_ptr, src_stride, twidth, source_y_fraction); Convert16To8Row_AVX2(row, dst_ptr, scale, twidth); src_ptr += twidth; dst_ptr += twidth; @@ -4601,465 +4610,6 @@ void HalfMergeUVRow_C(const uint8_t* src_u, #undef STATIC_CAST -void RGBToYMatrixRow_C(const uint8_t* src_rgb, - uint8_t* dst_y, - int width, - const struct ArgbConstants* c) { - SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - RGB24ToARGBRow_C(src_rgb, row, twidth); - ARGBToYMatrixRow_C(row, dst_y, twidth, c); - src_rgb += twidth * 3; - dst_y += twidth; - width -= twidth; - } -} - -void RGBToUVMatrixRow_C(const uint8_t* src_rgb, - int src_stride_rgb, - uint8_t* dst_u, - uint8_t* dst_v, - int width, - const struct ArgbConstants* c) { - SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - RGB24ToARGBRow_C(src_rgb, row, twidth); - RGB24ToARGBRow_C(src_rgb + src_stride_rgb, row + MAXTWIDTH * 4, twidth); - ARGBToUVMatrixRow_C(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c); - src_rgb += twidth * 3; - dst_u += twidth / 2; - dst_v += twidth / 2; - width -= twidth; - } -} - -#if defined(HAS_ARGBTOYMATRIXROW_AVX2) && defined(HAS_RGB24TOARGBROW_AVX2) -void RGBToYMatrixRow_AVX2(const uint8_t* src_rgb, - uint8_t* dst_y, - int width, - const struct ArgbConstants* c) { - SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - RGB24ToARGBRow_AVX2(src_rgb, row, twidth); - ARGBToYMatrixRow_AVX2(row, dst_y, twidth, c); - src_rgb += twidth * 3; - dst_y += twidth; - width -= twidth; - } -} -#endif - -#if defined(HAS_ARGBTOUVMATRIXROW_AVX2) && defined(HAS_RGB24TOARGBROW_AVX2) -void RGBToUVMatrixRow_AVX2(const uint8_t* src_rgb, - int src_stride_rgb, - uint8_t* dst_u, - uint8_t* dst_v, - int width, - const struct ArgbConstants* c) { - SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - RGB24ToARGBRow_AVX2(src_rgb, row, twidth); - RGB24ToARGBRow_AVX2(src_rgb + src_stride_rgb, row + MAXTWIDTH * 4, twidth); - ARGBToUVMatrixRow_AVX2(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c); - src_rgb += twidth * 3; - dst_u += twidth / 2; - dst_v += twidth / 2; - width -= twidth; - } -} -#endif - -#if defined(HAS_ARGBTOUVMATRIXROW_AVX512BW) && \ - defined(HAS_RGB24TOARGBROW_AVX512BW) -void RGBToUVMatrixRow_AVX512BW(const uint8_t* src_rgb, - int src_stride_rgb, - uint8_t* dst_u, - uint8_t* dst_v, - int width, - const struct ArgbConstants* c) { - SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - RGB24ToARGBRow_AVX512BW(src_rgb, row, twidth); - RGB24ToARGBRow_AVX512BW(src_rgb + src_stride_rgb, row + MAXTWIDTH * 4, - twidth); - ARGBToUVMatrixRow_AVX512BW(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c); - src_rgb += twidth * 3; - dst_u += twidth / 2; - dst_v += twidth / 2; - width -= twidth; - } -} -#endif - -#if defined(HAS_ARGBTOUVMATRIXROW_NEON) && defined(HAS_RGB24TOARGBROW_NEON) -void RGBToUVMatrixRow_NEON(const uint8_t* src_rgb, - int src_stride_rgb, - uint8_t* dst_u, - uint8_t* dst_v, - int width, - const struct ArgbConstants* c) { - SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - RGB24ToARGBRow_NEON(src_rgb, row, twidth); - RGB24ToARGBRow_NEON(src_rgb + src_stride_rgb, row + MAXTWIDTH * 4, twidth); - ARGBToUVMatrixRow_NEON(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c); - src_rgb += twidth * 3; - dst_u += twidth / 2; - dst_v += twidth / 2; - width -= twidth; - } -} -#endif - -void RGB565ToYMatrixRow_C(const uint8_t* src_rgb565, - uint8_t* dst_y, - int width, - const struct ArgbConstants* c) { - SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - RGB565ToARGBRow_C(src_rgb565, row, twidth); - ARGBToYMatrixRow_C(row, dst_y, twidth, c); - src_rgb565 += twidth * 2; - dst_y += twidth; - width -= twidth; - } -} - -void RGB565ToUVMatrixRow_C(const uint8_t* src_rgb565, - int src_stride_rgb565, - uint8_t* dst_u, - uint8_t* dst_v, - int width, - const struct ArgbConstants* c) { - SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - RGB565ToARGBRow_C(src_rgb565, row, twidth); - RGB565ToARGBRow_C(src_rgb565 + src_stride_rgb565, row + MAXTWIDTH * 4, - twidth); - ARGBToUVMatrixRow_C(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c); - src_rgb565 += twidth * 2; - dst_u += twidth / 2; - dst_v += twidth / 2; - width -= twidth; - } -} - -#if defined(HAS_ARGBTOYMATRIXROW_AVX2) && defined(HAS_RGB565TOARGBROW_AVX2) -void RGB565ToYMatrixRow_AVX2(const uint8_t* src_rgb565, - uint8_t* dst_y, - int width, - const struct ArgbConstants* c) { - SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - RGB565ToARGBRow_AVX2(src_rgb565, row, twidth); - ARGBToYMatrixRow_AVX2(row, dst_y, twidth, c); - src_rgb565 += twidth * 2; - dst_y += twidth; - width -= twidth; - } -} -#endif - -#if defined(HAS_ARGBTOUVMATRIXROW_AVX2) && defined(HAS_RGB565TOARGBROW_AVX2) -void RGB565ToUVMatrixRow_AVX2(const uint8_t* src_rgb565, - int src_stride_rgb565, - uint8_t* dst_u, - uint8_t* dst_v, - int width, - const struct ArgbConstants* c) { - SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - RGB565ToARGBRow_AVX2(src_rgb565, row, twidth); - RGB565ToARGBRow_AVX2(src_rgb565 + src_stride_rgb565, row + MAXTWIDTH * 4, - twidth); - ARGBToUVMatrixRow_AVX2(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c); - src_rgb565 += twidth * 2; - dst_u += twidth / 2; - dst_v += twidth / 2; - width -= twidth; - } -} -#endif - -#if defined(HAS_RGB565TOARGBROW_NEON) && defined(HAS_ARGBTOYMATRIXROW_NEON) -void RGB565ToYMatrixRow_NEON(const uint8_t* src_rgb565, - uint8_t* dst_y, - int width, - const struct ArgbConstants* c) { - SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - RGB565ToARGBRow_NEON(src_rgb565, row, twidth); - ARGBToYMatrixRow_NEON(row, dst_y, twidth, c); - src_rgb565 += twidth * 2; - dst_y += twidth; - width -= twidth; - } -} -#endif - -#if defined(HAS_RGB565TOARGBROW_NEON) && defined(HAS_ARGBTOUVMATRIXROW_NEON) -void RGB565ToUVMatrixRow_NEON(const uint8_t* src_rgb565, - int src_stride_rgb565, - uint8_t* dst_u, - uint8_t* dst_v, - int width, - const struct ArgbConstants* c) { - SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - RGB565ToARGBRow_NEON(src_rgb565, row, twidth); - RGB565ToARGBRow_NEON(src_rgb565 + src_stride_rgb565, row + MAXTWIDTH * 4, - twidth); - ARGBToUVMatrixRow_NEON(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c); - src_rgb565 += twidth * 2; - dst_u += twidth / 2; - dst_v += twidth / 2; - width -= twidth; - } -} -#endif - -void ARGB1555ToYMatrixRow_C(const uint8_t* src_argb1555, - uint8_t* dst_y, - int width, - const struct ArgbConstants* c) { - SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - ARGB1555ToARGBRow_C(src_argb1555, row, twidth); - ARGBToYMatrixRow_C(row, dst_y, twidth, c); - src_argb1555 += twidth * 2; - dst_y += twidth; - width -= twidth; - } -} - -void ARGB1555ToUVMatrixRow_C(const uint8_t* src_argb1555, - int src_stride_argb1555, - uint8_t* dst_u, - uint8_t* dst_v, - int width, - const struct ArgbConstants* c) { - SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - ARGB1555ToARGBRow_C(src_argb1555, row, twidth); - ARGB1555ToARGBRow_C(src_argb1555 + src_stride_argb1555, row + MAXTWIDTH * 4, - twidth); - ARGBToUVMatrixRow_C(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c); - src_argb1555 += twidth * 2; - dst_u += twidth / 2; - dst_v += twidth / 2; - width -= twidth; - } -} - -void ARGB4444ToYMatrixRow_C(const uint8_t* src_argb4444, - uint8_t* dst_y, - int width, - const struct ArgbConstants* c) { - SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - ARGB4444ToARGBRow_C(src_argb4444, row, twidth); - ARGBToYMatrixRow_C(row, dst_y, twidth, c); - src_argb4444 += twidth * 2; - dst_y += twidth; - width -= twidth; - } -} - -void ARGB4444ToUVMatrixRow_C(const uint8_t* src_argb4444, - int src_stride_argb4444, - uint8_t* dst_u, - uint8_t* dst_v, - int width, - const struct ArgbConstants* c) { - SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - ARGB4444ToARGBRow_C(src_argb4444, row, twidth); - ARGB4444ToARGBRow_C(src_argb4444 + src_stride_argb4444, row + MAXTWIDTH * 4, - twidth); - ARGBToUVMatrixRow_C(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c); - src_argb4444 += twidth * 2; - dst_u += twidth / 2; - dst_v += twidth / 2; - width -= twidth; - } -} - -#if defined(HAS_ARGBTOYMATRIXROW_AVX2) -#if defined(HAS_ARGB1555TOARGBROW_AVX2) -void ARGB1555ToYMatrixRow_AVX2(const uint8_t* src_argb1555, - uint8_t* dst_y, - int width, - const struct ArgbConstants* c) { - SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - ARGB1555ToARGBRow_AVX2(src_argb1555, row, twidth); - ARGBToYMatrixRow_AVX2(row, dst_y, twidth, c); - src_argb1555 += twidth * 2; - dst_y += twidth; - width -= twidth; - } -} -#endif - -#if defined(HAS_ARGB4444TOARGBROW_AVX2) -void ARGB4444ToYMatrixRow_AVX2(const uint8_t* src_argb4444, - uint8_t* dst_y, - int width, - const struct ArgbConstants* c) { - SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - ARGB4444ToARGBRow_AVX2(src_argb4444, row, twidth); - ARGBToYMatrixRow_AVX2(row, dst_y, twidth, c); - src_argb4444 += twidth * 2; - dst_y += twidth; - width -= twidth; - } -} -#endif -#endif - -#if defined(HAS_ARGBTOUVMATRIXROW_AVX2) -#if defined(HAS_ARGB1555TOARGBROW_AVX2) -void ARGB1555ToUVMatrixRow_AVX2(const uint8_t* src_argb1555, - int src_stride_argb1555, - uint8_t* dst_u, - uint8_t* dst_v, - int width, - const struct ArgbConstants* c) { - SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - ARGB1555ToARGBRow_AVX2(src_argb1555, row, twidth); - ARGB1555ToARGBRow_AVX2(src_argb1555 + src_stride_argb1555, - row + MAXTWIDTH * 4, twidth); - ARGBToUVMatrixRow_AVX2(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c); - src_argb1555 += twidth * 2; - dst_u += twidth / 2; - dst_v += twidth / 2; - width -= twidth; - } -} -#endif - -#if defined(HAS_ARGB4444TOARGBROW_AVX2) -void ARGB4444ToUVMatrixRow_AVX2(const uint8_t* src_argb4444, - int src_stride_argb4444, - uint8_t* dst_u, - uint8_t* dst_v, - int width, - const struct ArgbConstants* c) { - SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - ARGB4444ToARGBRow_AVX2(src_argb4444, row, twidth); - ARGB4444ToARGBRow_AVX2(src_argb4444 + src_stride_argb4444, - row + MAXTWIDTH * 4, twidth); - ARGBToUVMatrixRow_AVX2(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c); - src_argb4444 += twidth * 2; - dst_u += twidth / 2; - dst_v += twidth / 2; - width -= twidth; - } -} -#endif -#endif - -#if defined(HAS_ARGBTOYMATRIXROW_NEON) && defined(HAS_ARGB1555TOARGBROW_NEON) -void ARGB1555ToYMatrixRow_NEON(const uint8_t* src_argb1555, - uint8_t* dst_y, - int width, - const struct ArgbConstants* c) { - SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - ARGB1555ToARGBRow_NEON(src_argb1555, row, twidth); - ARGBToYMatrixRow_NEON(row, dst_y, twidth, c); - src_argb1555 += twidth * 2; - dst_y += twidth; - width -= twidth; - } -} -#endif - -#if defined(HAS_ARGBTOYMATRIXROW_NEON) && defined(HAS_ARGB4444TOARGBROW_NEON) -void ARGB4444ToYMatrixRow_NEON(const uint8_t* src_argb4444, - uint8_t* dst_y, - int width, - const struct ArgbConstants* c) { - SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - ARGB4444ToARGBRow_NEON(src_argb4444, row, twidth); - ARGBToYMatrixRow_NEON(row, dst_y, twidth, c); - src_argb4444 += twidth * 2; - dst_y += twidth; - width -= twidth; - } -} -#endif - -#if defined(HAS_ARGBTOUVMATRIXROW_NEON) && defined(HAS_ARGB1555TOARGBROW_NEON) -void ARGB1555ToUVMatrixRow_NEON(const uint8_t* src_argb1555, - int src_stride_argb1555, - uint8_t* dst_u, - uint8_t* dst_v, - int width, - const struct ArgbConstants* c) { - SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - ARGB1555ToARGBRow_NEON(src_argb1555, row, twidth); - ARGB1555ToARGBRow_NEON(src_argb1555 + src_stride_argb1555, - row + MAXTWIDTH * 4, twidth); - ARGBToUVMatrixRow_NEON(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c); - src_argb1555 += twidth * 2; - dst_u += twidth / 2; - dst_v += twidth / 2; - width -= twidth; - } -} -#endif - -#if defined(HAS_ARGBTOUVMATRIXROW_NEON) && defined(HAS_ARGB4444TOARGBROW_NEON) -void ARGB4444ToUVMatrixRow_NEON(const uint8_t* src_argb4444, - int src_stride_argb4444, - uint8_t* dst_u, - uint8_t* dst_v, - int width, - const struct ArgbConstants* c) { - SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]); - while (width > 0) { - int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; - ARGB4444ToARGBRow_NEON(src_argb4444, row, twidth); - ARGB4444ToARGBRow_NEON(src_argb4444 + src_stride_argb4444, - row + MAXTWIDTH * 4, twidth); - ARGBToUVMatrixRow_NEON(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c); - src_argb4444 += twidth * 2; - dst_u += twidth / 2; - dst_v += twidth / 2; - width -= twidth; - } -} -#endif - #ifdef __cplusplus } // extern "C" } // namespace libyuv diff --git a/source/row_gcc.cc b/source/row_gcc.cc index 10ecf5910..767dc8605 100644 --- a/source/row_gcc.cc +++ b/source/row_gcc.cc @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "libyuv/convert_from_argb.h" // For ArgbConstants #include "libyuv/row.h" +#include "libyuv/convert_from_argb.h" // For ArgbConstants #ifdef __cplusplus namespace libyuv { @@ -21,10 +21,6 @@ extern "C" { (defined(__x86_64__) || defined(__i386__)) && \ !defined(LIBYUV_ENABLE_ROWWIN) -// Note: for avx and avx512 declare clobber as xmm registers due to -// clang for windows needing to preserve xmm registers but not saving -// them if declared as ymm or zmm. - #if defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_ARGBGRAYROW_SSSE3) // Constants for ARGB @@ -33,6 +29,7 @@ extern "C" { static const uvec8 kARGBToYJ = {29u, 150u, 77u, 0u, 29u, 150u, 77u, 0u, 29u, 150u, 77u, 0u, 29u, 150u, 77u, 0u}; + #endif // defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_ARGBGRAYROW_SSSE3) #if defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_I422TOARGBROW_SSSE3) @@ -51,10 +48,8 @@ static const uvec16 kSub128 = {0x8080u, 0x8080u, 0x8080u, 0x8080u, #ifdef HAS_RGB24TOARGBROW_SSSE3 // Shuffle table for converting RGB24 to ARGB. -static const uvec8 kShuffleMaskRGB24ToARGB[2] = { - {0u, 1u, 2u, 128u, 3u, 4u, 5u, 128u, 6u, 7u, 8u, 128u, 9u, 10u, 11u, 128u}, - {4u, 5u, 6u, 128u, 7u, 8u, 9u, 128u, 10u, 11u, 12u, 128u, 13u, 14u, 15u, - 128u}}; +static const uvec8 kShuffleMaskRGB24ToARGB = { + 0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u}; // Shuffle table for converting RAW to ARGB. static const uvec8 kShuffleMaskRAWToARGB = { @@ -118,76 +113,34 @@ static const lvec8 kShuffleNV21 = { }; #endif // HAS_RGB24TOARGBROW_SSSE3 -#if defined(HAS_J400TOARGBROW_AVX2) || defined(HAS_J400TOARGBROW_AVX512BW) -alignas(64) static const uint8_t kShuffleMaskJ400ToARGB[64] = { - 0u, 0u, 0u, 128u, 1u, 1u, 1u, 128u, 2u, 2u, 2u, 128u, 3u, 3u, - 3u, 128u, 4u, 4u, 4u, 128u, 5u, 5u, 5u, 128u, 6u, 6u, 6u, 128u, - 7u, 7u, 7u, 128u, 8u, 8u, 8u, 128u, 9u, 9u, 9u, 128u, 10u, 10u, - 10u, 128u, 11u, 11u, 11u, 128u, 12u, 12u, 12u, 128u, 13u, 13u, 13u, 128u, - 14u, 14u, 14u, 128u, 15u, 15u, 15u, 128u}; -#endif - -#ifdef HAS_J400TOARGBROW_AVX2 -void J400ToARGBRow_AVX2(const uint8_t* src_y, uint8_t* dst_argb, int width) { +#ifdef HAS_J400TOARGBROW_SSE2 +void J400ToARGBRow_SSE2(const uint8_t* src_y, uint8_t* dst_argb, int width) { asm volatile( - "vpcmpeqb %%ymm7,%%ymm7,%%ymm7 \n" - "vpslld $0x18,%%ymm7,%%ymm7 \n" - "vmovdqa (%3),%%ymm5 \n" - "vmovdqa 0x20(%3),%%ymm6 \n" + "pcmpeqb %%xmm5,%%xmm5 \n" + "pslld $0x18,%%xmm5 \n" LABELALIGN "1: \n" - "vbroadcasti128 (%0),%%ymm0 \n" - "vpshufb %%ymm5,%%ymm0,%%ymm1 \n" - "vpshufb %%ymm6,%%ymm0,%%ymm2 \n" - "vpor %%ymm7,%%ymm1,%%ymm1 \n" - "vpor %%ymm7,%%ymm2,%%ymm2 \n" - "vmovdqu %%ymm1,(%1) \n" - "vmovdqu %%ymm2,0x20(%1) \n" - "lea 0x10(%0),%0 \n" - "lea 0x40(%1),%1 \n" - "sub $0x10,%2 \n" + "movq (%0),%%xmm0 \n" + "lea 0x8(%0),%0 \n" + "punpcklbw %%xmm0,%%xmm0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "punpcklwd %%xmm0,%%xmm0 \n" + "punpckhwd %%xmm1,%%xmm1 \n" + "por %%xmm5,%%xmm0 \n" + "por %%xmm5,%%xmm1 \n" + "movdqu %%xmm0,(%1) \n" + "movdqu %%xmm1,0x10(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x8,%2 \n" "jg 1b \n" - "vzeroupper \n" - : "+r"(src_y), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "r"(kShuffleMaskJ400ToARGB) // %3 - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm5", "xmm6", "xmm7"); + : "+r"(src_y), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + ::"memory", + "cc", "xmm0", "xmm1", "xmm5"); } -#endif // HAS_J400TOARGBROW_AVX2 - -#ifdef HAS_J400TOARGBROW_AVX512BW -void J400ToARGBRow_AVX512BW(const uint8_t* src_y, - uint8_t* dst_argb, - int width) { - asm volatile( - "vpternlogd $0xff,%%zmm7,%%zmm7,%%zmm7 \n" // 0xffffffff - "vpslld $0x18,%%zmm7,%%zmm7 \n" // 0xff000000 - "vmovdqa64 %3,%%zmm5 \n" - - LABELALIGN - "1: \n" - "vbroadcasti32x4 (%0),%%zmm0 \n" - "vbroadcasti32x4 0x10(%0),%%zmm1 \n" - "vpshufb %%zmm5,%%zmm0,%%zmm0 \n" - "vpshufb %%zmm5,%%zmm1,%%zmm1 \n" - "vpord %%zmm7,%%zmm0,%%zmm0 \n" - "vpord %%zmm7,%%zmm1,%%zmm1 \n" - "vmovdqu64 %%zmm0,(%1) \n" - "vmovdqu64 %%zmm1,0x40(%1) \n" - "lea 0x20(%0),%0 \n" - "lea 0x80(%1),%1 \n" - "sub $0x20,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_y), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "m"(kShuffleMaskJ400ToARGB) // %3 - : "memory", "cc", "xmm0", "xmm1", "xmm5", "xmm7"); -} -#endif // HAS_J400TOARGBROW_AVX512BW +#endif // HAS_J400TOARGBROW_SSE2 #ifdef HAS_RGB24TOARGBROW_SSSE3 void RGB24ToARGBRow_SSSE3(const uint8_t* src_rgb24, @@ -223,62 +176,13 @@ void RGB24ToARGBRow_SSSE3(const uint8_t* src_rgb24, "lea 0x40(%1),%1 \n" "sub $0x10,%2 \n" "jg 1b \n" - : "+r"(src_rgb24), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "m"(kShuffleMaskRGB24ToARGB[0]) // %3 + : "+r"(src_rgb24), // %0 + "+r"(dst_argb), // %1 + "+r"(width) // %2 + : "m"(kShuffleMaskRGB24ToARGB) // %3 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } -#ifdef HAS_RGB24TOARGBROW_AVX2 -void RGB24ToARGBRow_AVX2(const uint8_t* src_rgb24, - uint8_t* dst_argb, - int width) { - // Reference to prevent discarding of kShuffleMaskRGB24ToARGB[1] which is - // accessed via offset in assembly. - const uvec8* dummy = &kShuffleMaskRGB24ToARGB[1]; - (void)dummy; - asm volatile( - "vpcmpeqb %%ymm6,%%ymm6,%%ymm6 \n" // 0xff000000 - "vpslld $0x18,%%ymm6,%%ymm6 \n" - "vbroadcasti128 %3,%%ymm4 \n" - "vbroadcasti128 16+%3,%%ymm5 \n" - - LABELALIGN - "1: \n" - "vmovdqu (%0),%%xmm0 \n" // first 12 - "vinserti128 $1,12(%0),%%ymm0,%%ymm0 \n" // second 12 - "vmovdqu 24(%0),%%xmm1 \n" // third 12 - "vinserti128 $1,36(%0),%%ymm1,%%ymm1 \n" // forth 12 - "vmovdqu 48(%0),%%xmm2 \n" // fifth 12 - "vinserti128 $1,60(%0),%%ymm2,%%ymm2 \n" // sixth 12 - "vmovdqu 68(%0),%%xmm3 \n" // seventh 12 - "vinserti128 $1,80(%0),%%ymm3,%%ymm3 \n" // eighth 12 - "lea 96(%0),%0 \n" - "vpshufb %%ymm4,%%ymm0,%%ymm0 \n" - "vpshufb %%ymm4,%%ymm1,%%ymm1 \n" - "vpshufb %%ymm4,%%ymm2,%%ymm2 \n" - "vpshufb %%ymm5,%%ymm3,%%ymm3 \n" - "vpor %%ymm6,%%ymm0,%%ymm0 \n" - "vpor %%ymm6,%%ymm1,%%ymm1 \n" - "vpor %%ymm6,%%ymm2,%%ymm2 \n" - "vpor %%ymm6,%%ymm3,%%ymm3 \n" - "vmovdqu %%ymm0,(%1) \n" - "vmovdqu %%ymm1,0x20(%1) \n" - "vmovdqu %%ymm2,0x40(%1) \n" - "vmovdqu %%ymm3,0x60(%1) \n" - "lea 0x80(%1),%1 \n" - "sub $0x20,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_rgb24), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "m"(kShuffleMaskRGB24ToARGB[0]) // %3 - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); -} -#endif // HAS_RGB24TOARGBROW_AVX2 - void RAWToARGBRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_argb, int width) { asm volatile( "pcmpeqb %%xmm6,%%xmm6 \n" // 0xff000000 @@ -362,10 +266,7 @@ void RAWToARGBRow_AVX2(const uint8_t* src_raw, uint8_t* dst_argb, int width) { static const uint32_t kPermdRAWToARGB_AVX512BW[16] = { 0, 1, 2, 3, 3, 4, 5, 6, 6, 7, 8, 9, 9, 10, 11, 12}; -void RGBToARGBRow_AVX512BW(const uint8_t* src_raw, - uint8_t* dst_argb, - const uint32_t* shuffler, - int width) { +void RGBToARGBRow_AVX512BW(const uint8_t* src_raw, uint8_t* dst_argb, const uint32_t* shuffler, int width) { asm volatile( "vpternlogd $0xff,%%zmm6,%%zmm6,%%zmm6 \n" // 0xffffffff "vpslld $0x18,%%zmm6,%%zmm6 \n" // 0xff000000 @@ -406,25 +307,19 @@ void RGBToARGBRow_AVX512BW(const uint8_t* src_raw, "+r"(width) // %2 : "m"(kPermdRAWToARGB_AVX512BW), // %3 "m"(*shuffler) // %4 - : "memory", "cc", "rax", "k1", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", - "xmm5", "xmm6"); + : "memory", "cc", "rax", "k1", "zmm0", "zmm1", "zmm2", "zmm3", "zmm4", "zmm5", "zmm6"); } -void RAWToARGBRow_AVX512BW(const uint8_t* src_raw, - uint8_t* dst_argb, - int width) { - RGBToARGBRow_AVX512BW(src_raw, dst_argb, - (const uint32_t*)&kShuffleMaskRAWToARGB, width); +void RAWToARGBRow_AVX512BW(const uint8_t* src_raw, uint8_t* dst_argb, int width) { + RGBToARGBRow_AVX512BW(src_raw, dst_argb, (const uint32_t*)&kShuffleMaskRAWToARGB, width); } -void RGB24ToARGBRow_AVX512BW(const uint8_t* src_rgb24, - uint8_t* dst_argb, - int width) { - RGBToARGBRow_AVX512BW(src_rgb24, dst_argb, - (const uint32_t*)&kShuffleMaskRGB24ToARGB[0], width); +void RGB24ToARGBRow_AVX512BW(const uint8_t* src_rgb24, uint8_t* dst_argb, int width) { + RGBToARGBRow_AVX512BW(src_rgb24, dst_argb, (const uint32_t*)&kShuffleMaskRGB24ToARGB, width); } #endif + // Same code as RAWToARGB with different shuffler and A in low bits void RAWToRGBARow_SSSE3(const uint8_t* src_raw, uint8_t* dst_rgba, int width) { asm volatile( @@ -496,47 +391,46 @@ void RAWToRGB24Row_SSSE3(const uint8_t* src_raw, : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } -#ifdef HAS_RGB565TOARGBROW_AVX2 -void RGB565ToARGBRow_AVX2(const uint8_t* src, uint8_t* dst, int width) { +void RGB565ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width) { asm volatile( "mov $0x1080108,%%eax \n" - "vmovd %%eax,%%xmm5 \n" - "vpbroadcastd %%xmm5,%%ymm5 \n" + "movd %%eax,%%xmm5 \n" + "pshufd $0x0,%%xmm5,%%xmm5 \n" "mov $0x20802080,%%eax \n" - "vmovd %%eax,%%xmm6 \n" - "vpbroadcastd %%xmm6,%%ymm6 \n" - "vpcmpeqb %%ymm3,%%ymm3,%%ymm3 \n" - "vpsllw $0xb,%%ymm3,%%ymm3 \n" - "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n" - "vpsllw $10,%%ymm4,%%ymm4 \n" - "vpsrlw $5,%%ymm4,%%ymm4 \n" - "vpcmpeqb %%ymm7,%%ymm7,%%ymm7 \n" - "vpsllw $0x8,%%ymm7,%%ymm7 \n" + "movd %%eax,%%xmm6 \n" + "pshufd $0x0,%%xmm6,%%xmm6 \n" + "pcmpeqb %%xmm3,%%xmm3 \n" + "psllw $0xb,%%xmm3 \n" + "pcmpeqb %%xmm4,%%xmm4 \n" + "psllw $10,%%xmm4 \n" + "psrlw $5,%%xmm4 \n" + "pcmpeqb %%xmm7,%%xmm7 \n" + "psllw $0x8,%%xmm7 \n" "sub %0,%1 \n" "sub %0,%1 \n" LABELALIGN "1: \n" - "vmovdqu (%0),%%ymm0 \n" - "vpand %%ymm3,%%ymm0,%%ymm1 \n" - "vpsllw $0xb,%%ymm0,%%ymm2 \n" - "vpmulhuw %%ymm5,%%ymm1,%%ymm1 \n" - "vpmulhuw %%ymm5,%%ymm2,%%ymm2 \n" - "vpsllw $0x8,%%ymm1,%%ymm1 \n" - "vpor %%ymm2,%%ymm1,%%ymm1 \n" - "vpand %%ymm4,%%ymm0,%%ymm0 \n" - "vpmulhuw %%ymm6,%%ymm0,%%ymm0 \n" - "vpor %%ymm7,%%ymm0,%%ymm0 \n" - "vpunpcklbw %%ymm0,%%ymm1,%%ymm2 \n" - "vpunpckhbw %%ymm0,%%ymm1,%%ymm1 \n" - "vperm2i128 $0x20,%%ymm1,%%ymm2,%%ymm0 \n" - "vperm2i128 $0x31,%%ymm1,%%ymm2,%%ymm1 \n" - "vmovdqu %%ymm0,(%1,%0,2) \n" - "vmovdqu %%ymm1,0x20(%1,%0,2) \n" - "lea 0x20(%0),%0 \n" - "sub $0x10,%2 \n" + "movdqu (%0),%%xmm0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "movdqa %%xmm0,%%xmm2 \n" + "pand %%xmm3,%%xmm1 \n" + "psllw $0xb,%%xmm2 \n" + "pmulhuw %%xmm5,%%xmm1 \n" + "pmulhuw %%xmm5,%%xmm2 \n" + "psllw $0x8,%%xmm1 \n" + "por %%xmm2,%%xmm1 \n" + "pand %%xmm4,%%xmm0 \n" + "pmulhuw %%xmm6,%%xmm0 \n" + "por %%xmm7,%%xmm0 \n" + "movdqa %%xmm1,%%xmm2 \n" + "punpcklbw %%xmm0,%%xmm1 \n" + "punpckhbw %%xmm0,%%xmm2 \n" + "movdqu %%xmm1,0x00(%1,%0,2) \n" + "movdqu %%xmm2,0x10(%1,%0,2) \n" + "lea 0x10(%0),%0 \n" + "sub $0x8,%2 \n" "jg 1b \n" - "vzeroupper \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 @@ -544,50 +438,50 @@ void RGB565ToARGBRow_AVX2(const uint8_t* src, uint8_t* dst, int width) { : "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"); } -#endif -#ifdef HAS_ARGB1555TOARGBROW_AVX2 -void ARGB1555ToARGBRow_AVX2(const uint8_t* src, uint8_t* dst, int width) { +void ARGB1555ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width) { asm volatile( "mov $0x1080108,%%eax \n" - "vmovd %%eax,%%xmm5 \n" - "vpbroadcastd %%xmm5,%%ymm5 \n" + "movd %%eax,%%xmm5 \n" + "pshufd $0x0,%%xmm5,%%xmm5 \n" "mov $0x42004200,%%eax \n" - "vmovd %%eax,%%xmm6 \n" - "vpbroadcastd %%xmm6,%%ymm6 \n" - "vpcmpeqb %%ymm3,%%ymm3,%%ymm3 \n" - "vpsllw $0xb,%%ymm3,%%ymm3 \n" - "vpsrlw $0x6,%%ymm3,%%ymm4 \n" - "vpcmpeqb %%ymm7,%%ymm7,%%ymm7 \n" - "vpsllw $0x8,%%ymm7,%%ymm7 \n" + "movd %%eax,%%xmm6 \n" + "pshufd $0x0,%%xmm6,%%xmm6 \n" + "pcmpeqb %%xmm3,%%xmm3 \n" + "psllw $0xb,%%xmm3 \n" + "movdqa %%xmm3,%%xmm4 \n" + "psrlw $0x6,%%xmm4 \n" + "pcmpeqb %%xmm7,%%xmm7 \n" + "psllw $0x8,%%xmm7 \n" "sub %0,%1 \n" "sub %0,%1 \n" LABELALIGN "1: \n" - "vmovdqu (%0),%%ymm0 \n" - "vpsllw $0x1,%%ymm0,%%ymm1 \n" - "vpsllw $0xb,%%ymm0,%%ymm2 \n" - "vpand %%ymm3,%%ymm1,%%ymm1 \n" - "vpmulhuw %%ymm5,%%ymm2,%%ymm2 \n" - "vpmulhuw %%ymm5,%%ymm1,%%ymm1 \n" - "vpsllw $0x8,%%ymm1,%%ymm1 \n" - "vpor %%ymm2,%%ymm1,%%ymm1 \n" - "vpsraw $0x8,%%ymm0,%%ymm2 \n" - "vpand %%ymm4,%%ymm0,%%ymm0 \n" - "vpmulhuw %%ymm6,%%ymm0,%%ymm0 \n" - "vpand %%ymm7,%%ymm2,%%ymm2 \n" - "vpor %%ymm2,%%ymm0,%%ymm0 \n" - "vpunpcklbw %%ymm0,%%ymm1,%%ymm2 \n" - "vpunpckhbw %%ymm0,%%ymm1,%%ymm1 \n" - "vperm2i128 $0x20,%%ymm1,%%ymm2,%%ymm0 \n" - "vperm2i128 $0x31,%%ymm1,%%ymm2,%%ymm1 \n" - "vmovdqu %%ymm0,(%1,%0,2) \n" - "vmovdqu %%ymm1,0x20(%1,%0,2) \n" - "lea 0x20(%0),%0 \n" - "sub $0x10,%2 \n" + "movdqu (%0),%%xmm0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "movdqa %%xmm0,%%xmm2 \n" + "psllw $0x1,%%xmm1 \n" + "psllw $0xb,%%xmm2 \n" + "pand %%xmm3,%%xmm1 \n" + "pmulhuw %%xmm5,%%xmm2 \n" + "pmulhuw %%xmm5,%%xmm1 \n" + "psllw $0x8,%%xmm1 \n" + "por %%xmm2,%%xmm1 \n" + "movdqa %%xmm0,%%xmm2 \n" + "pand %%xmm4,%%xmm0 \n" + "psraw $0x8,%%xmm2 \n" + "pmulhuw %%xmm6,%%xmm0 \n" + "pand %%xmm7,%%xmm2 \n" + "por %%xmm2,%%xmm0 \n" + "movdqa %%xmm1,%%xmm2 \n" + "punpcklbw %%xmm0,%%xmm1 \n" + "punpckhbw %%xmm0,%%xmm2 \n" + "movdqu %%xmm1,0x00(%1,%0,2) \n" + "movdqu %%xmm2,0x10(%1,%0,2) \n" + "lea 0x10(%0),%0 \n" + "sub $0x8,%2 \n" "jg 1b \n" - "vzeroupper \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 @@ -595,75 +489,74 @@ void ARGB1555ToARGBRow_AVX2(const uint8_t* src, uint8_t* dst, int width) { : "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"); } -#endif -#ifdef HAS_ARGB4444TOARGBROW_AVX2 -void ARGB4444ToARGBRow_AVX2(const uint8_t* src, uint8_t* dst, int width) { +void ARGB4444ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width) { asm volatile( - "mov $0x0f0f0f0f,%%eax \n" - "vmovd %%eax,%%xmm4 \n" - "vpbroadcastd %%xmm4,%%ymm4 \n" - "vpslld $0x4,%%ymm4,%%ymm5 \n" + "mov $0xf0f0f0f,%%eax \n" + "movd %%eax,%%xmm4 \n" + "pshufd $0x0,%%xmm4,%%xmm4 \n" + "movdqa %%xmm4,%%xmm5 \n" + "pslld $0x4,%%xmm5 \n" "sub %0,%1 \n" "sub %0,%1 \n" LABELALIGN "1: \n" - "vmovdqu (%0),%%ymm0 \n" - "vpand %%ymm5,%%ymm0,%%ymm2 \n" - "vpand %%ymm4,%%ymm0,%%ymm0 \n" - "vpsllw $0x4,%%ymm0,%%ymm1 \n" - "vpsrlw $0x4,%%ymm2,%%ymm3 \n" - "vpor %%ymm1,%%ymm0,%%ymm0 \n" - "vpor %%ymm3,%%ymm2,%%ymm2 \n" - "vpunpckhbw %%ymm2,%%ymm0,%%ymm1 \n" - "vpunpcklbw %%ymm2,%%ymm0,%%ymm0 \n" - "vperm2i128 $0x20,%%ymm1,%%ymm0,%%ymm2 \n" - "vperm2i128 $0x31,%%ymm1,%%ymm0,%%ymm1 \n" - "vmovdqu %%ymm2,(%1,%0,2) \n" - "vmovdqu %%ymm1,0x20(%1,%0,2) \n" - "lea 0x20(%0),%0 \n" - "sub $0x10,%2 \n" + "movdqu (%0),%%xmm0 \n" + "movdqa %%xmm0,%%xmm2 \n" + "pand %%xmm4,%%xmm0 \n" + "pand %%xmm5,%%xmm2 \n" + "movdqa %%xmm0,%%xmm1 \n" + "movdqa %%xmm2,%%xmm3 \n" + "psllw $0x4,%%xmm1 \n" + "psrlw $0x4,%%xmm3 \n" + "por %%xmm1,%%xmm0 \n" + "por %%xmm3,%%xmm2 \n" + "movdqa %%xmm0,%%xmm1 \n" + "punpcklbw %%xmm2,%%xmm0 \n" + "punpckhbw %%xmm2,%%xmm1 \n" + "movdqu %%xmm0,0x00(%1,%0,2) \n" + "movdqu %%xmm1,0x10(%1,%0,2) \n" + "lea 0x10(%0),%0 \n" + "sub $0x8,%2 \n" "jg 1b \n" - "vzeroupper \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 : : "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } -#endif void ARGBToRGB24Row_SSSE3(const uint8_t* src, uint8_t* dst, int width) { - asm volatile("movdqa %3,%%xmm6 \n" + asm volatile("movdqa %3,%%xmm6 \n" LABELALIGN - "1: \n" - "movdqu (%0),%%xmm0 \n" - "movdqu 0x10(%0),%%xmm1 \n" - "movdqu 0x20(%0),%%xmm2 \n" - "movdqu 0x30(%0),%%xmm3 \n" - "lea 0x40(%0),%0 \n" - "pshufb %%xmm6,%%xmm0 \n" - "pshufb %%xmm6,%%xmm1 \n" - "pshufb %%xmm6,%%xmm2 \n" - "pshufb %%xmm6,%%xmm3 \n" - "movdqa %%xmm1,%%xmm4 \n" - "psrldq $0x4,%%xmm1 \n" - "pslldq $0xc,%%xmm4 \n" - "movdqa %%xmm2,%%xmm5 \n" - "por %%xmm4,%%xmm0 \n" - "pslldq $0x8,%%xmm5 \n" - "movdqu %%xmm0,(%1) \n" - "por %%xmm5,%%xmm1 \n" - "psrldq $0x8,%%xmm2 \n" - "pslldq $0x4,%%xmm3 \n" - "por %%xmm3,%%xmm2 \n" - "movdqu %%xmm1,0x10(%1) \n" - "movdqu %%xmm2,0x20(%1) \n" - "lea 0x30(%1),%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x20(%0),%%xmm2 \n" + "movdqu 0x30(%0),%%xmm3 \n" + "lea 0x40(%0),%0 \n" + "pshufb %%xmm6,%%xmm0 \n" + "pshufb %%xmm6,%%xmm1 \n" + "pshufb %%xmm6,%%xmm2 \n" + "pshufb %%xmm6,%%xmm3 \n" + "movdqa %%xmm1,%%xmm4 \n" + "psrldq $0x4,%%xmm1 \n" + "pslldq $0xc,%%xmm4 \n" + "movdqa %%xmm2,%%xmm5 \n" + "por %%xmm4,%%xmm0 \n" + "pslldq $0x8,%%xmm5 \n" + "movdqu %%xmm0,(%1) \n" + "por %%xmm5,%%xmm1 \n" + "psrldq $0x8,%%xmm2 \n" + "pslldq $0x4,%%xmm3 \n" + "por %%xmm3,%%xmm2 \n" + "movdqu %%xmm1,0x10(%1) \n" + "movdqu %%xmm2,0x20(%1) \n" + "lea 0x30(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 @@ -673,35 +566,35 @@ void ARGBToRGB24Row_SSSE3(const uint8_t* src, uint8_t* dst, int width) { } void ARGBToRAWRow_SSSE3(const uint8_t* src, uint8_t* dst, int width) { - asm volatile("movdqa %3,%%xmm6 \n" + asm volatile("movdqa %3,%%xmm6 \n" LABELALIGN - "1: \n" - "movdqu (%0),%%xmm0 \n" - "movdqu 0x10(%0),%%xmm1 \n" - "movdqu 0x20(%0),%%xmm2 \n" - "movdqu 0x30(%0),%%xmm3 \n" - "lea 0x40(%0),%0 \n" - "pshufb %%xmm6,%%xmm0 \n" - "pshufb %%xmm6,%%xmm1 \n" - "pshufb %%xmm6,%%xmm2 \n" - "pshufb %%xmm6,%%xmm3 \n" - "movdqa %%xmm1,%%xmm4 \n" - "psrldq $0x4,%%xmm1 \n" - "pslldq $0xc,%%xmm4 \n" - "movdqa %%xmm2,%%xmm5 \n" - "por %%xmm4,%%xmm0 \n" - "pslldq $0x8,%%xmm5 \n" - "movdqu %%xmm0,(%1) \n" - "por %%xmm5,%%xmm1 \n" - "psrldq $0x8,%%xmm2 \n" - "pslldq $0x4,%%xmm3 \n" - "por %%xmm3,%%xmm2 \n" - "movdqu %%xmm1,0x10(%1) \n" - "movdqu %%xmm2,0x20(%1) \n" - "lea 0x30(%1),%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "movdqu 0x20(%0),%%xmm2 \n" + "movdqu 0x30(%0),%%xmm3 \n" + "lea 0x40(%0),%0 \n" + "pshufb %%xmm6,%%xmm0 \n" + "pshufb %%xmm6,%%xmm1 \n" + "pshufb %%xmm6,%%xmm2 \n" + "pshufb %%xmm6,%%xmm3 \n" + "movdqa %%xmm1,%%xmm4 \n" + "psrldq $0x4,%%xmm1 \n" + "pslldq $0xc,%%xmm4 \n" + "movdqa %%xmm2,%%xmm5 \n" + "por %%xmm4,%%xmm0 \n" + "pslldq $0x8,%%xmm5 \n" + "movdqu %%xmm0,(%1) \n" + "por %%xmm5,%%xmm1 \n" + "psrldq $0x8,%%xmm2 \n" + "pslldq $0x4,%%xmm3 \n" + "por %%xmm3,%%xmm2 \n" + "movdqu %%xmm1,0x10(%1) \n" + "movdqu %%xmm2,0x20(%1) \n" + "lea 0x30(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 @@ -853,6 +746,90 @@ void ARGBToRAWRow_AVX2(const uint8_t* src, uint8_t* dst, int width) { } #endif +void ARGBToRGB565Row_SSE2(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + "pcmpeqb %%xmm3,%%xmm3 \n" + "psrld $0x1b,%%xmm3 \n" + "pcmpeqb %%xmm4,%%xmm4 \n" + "psrld $0x1a,%%xmm4 \n" + "pslld $0x5,%%xmm4 \n" + "pcmpeqb %%xmm5,%%xmm5 \n" + "pslld $0xb,%%xmm5 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "movdqa %%xmm0,%%xmm2 \n" + "pslld $0x8,%%xmm0 \n" + "psrld $0x3,%%xmm1 \n" + "psrld $0x5,%%xmm2 \n" + "psrad $0x10,%%xmm0 \n" + "pand %%xmm3,%%xmm1 \n" + "pand %%xmm4,%%xmm2 \n" + "pand %%xmm5,%%xmm0 \n" + "por %%xmm2,%%xmm1 \n" + "por %%xmm1,%%xmm0 \n" + "packssdw %%xmm0,%%xmm0 \n" + "lea 0x10(%0),%0 \n" + "movq %%xmm0,(%1) \n" + "lea 0x8(%1),%1 \n" + "sub $0x4,%2 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + ::"memory", + "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); +} + +void ARGBToRGB565DitherRow_SSE2(const uint8_t* src, + uint8_t* dst, + uint32_t dither4, + int width) { + asm volatile( + "movd %3,%%xmm6 \n" + "punpcklbw %%xmm6,%%xmm6 \n" + "movdqa %%xmm6,%%xmm7 \n" + "punpcklwd %%xmm6,%%xmm6 \n" + "punpckhwd %%xmm7,%%xmm7 \n" + "pcmpeqb %%xmm3,%%xmm3 \n" + "psrld $0x1b,%%xmm3 \n" + "pcmpeqb %%xmm4,%%xmm4 \n" + "psrld $0x1a,%%xmm4 \n" + "pslld $0x5,%%xmm4 \n" + "pcmpeqb %%xmm5,%%xmm5 \n" + "pslld $0xb,%%xmm5 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "paddusb %%xmm6,%%xmm0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "movdqa %%xmm0,%%xmm2 \n" + "pslld $0x8,%%xmm0 \n" + "psrld $0x3,%%xmm1 \n" + "psrld $0x5,%%xmm2 \n" + "psrad $0x10,%%xmm0 \n" + "pand %%xmm3,%%xmm1 \n" + "pand %%xmm4,%%xmm2 \n" + "pand %%xmm5,%%xmm0 \n" + "por %%xmm2,%%xmm1 \n" + "por %%xmm1,%%xmm0 \n" + "packssdw %%xmm0,%%xmm0 \n" + "lea 0x10(%0),%0 \n" + "movq %%xmm0,(%1) \n" + "lea 0x8(%1),%1 \n" + "sub $0x4,%2 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "m"(dither4) // %3 + : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", + "xmm7"); +} + #ifdef HAS_ARGBTORGB565DITHERROW_AVX2 void ARGBToRGB565DitherRow_AVX2(const uint8_t* src, uint8_t* dst, @@ -899,6 +876,75 @@ void ARGBToRGB565DitherRow_AVX2(const uint8_t* src, } #endif // HAS_ARGBTORGB565DITHERROW_AVX2 +void ARGBToARGB1555Row_SSE2(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + "pcmpeqb %%xmm4,%%xmm4 \n" + "psrld $0x1b,%%xmm4 \n" + "movdqa %%xmm4,%%xmm5 \n" + "pslld $0x5,%%xmm5 \n" + "movdqa %%xmm4,%%xmm6 \n" + "pslld $0xa,%%xmm6 \n" + "pcmpeqb %%xmm7,%%xmm7 \n" + "pslld $0xf,%%xmm7 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "movdqa %%xmm0,%%xmm2 \n" + "movdqa %%xmm0,%%xmm3 \n" + "psrad $0x10,%%xmm0 \n" + "psrld $0x3,%%xmm1 \n" + "psrld $0x6,%%xmm2 \n" + "psrld $0x9,%%xmm3 \n" + "pand %%xmm7,%%xmm0 \n" + "pand %%xmm4,%%xmm1 \n" + "pand %%xmm5,%%xmm2 \n" + "pand %%xmm6,%%xmm3 \n" + "por %%xmm1,%%xmm0 \n" + "por %%xmm3,%%xmm2 \n" + "por %%xmm2,%%xmm0 \n" + "packssdw %%xmm0,%%xmm0 \n" + "lea 0x10(%0),%0 \n" + "movq %%xmm0,(%1) \n" + "lea 0x8(%1),%1 \n" + "sub $0x4,%2 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + ::"memory", + "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"); +} + +void ARGBToARGB4444Row_SSE2(const uint8_t* src, uint8_t* dst, int width) { + asm volatile( + "pcmpeqb %%xmm4,%%xmm4 \n" + "psllw $0xc,%%xmm4 \n" + "movdqa %%xmm4,%%xmm3 \n" + "psrlw $0x8,%%xmm3 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "pand %%xmm3,%%xmm0 \n" + "pand %%xmm4,%%xmm1 \n" + "psrlq $0x4,%%xmm0 \n" + "psrlq $0x8,%%xmm1 \n" + "por %%xmm1,%%xmm0 \n" + "packuswb %%xmm0,%%xmm0 \n" + "lea 0x10(%0),%0 \n" + "movq %%xmm0,(%1) \n" + "lea 0x8(%1),%1 \n" + "sub $0x4,%2 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + ::"memory", + "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"); +} #endif // HAS_RGB24TOARGBROW_SSSE3 /* @@ -1166,21 +1212,21 @@ void AR64ToARGBRow_SSSE3(const uint16_t* src_ar64, void AB64ToARGBRow_SSSE3(const uint16_t* src_ab64, uint8_t* dst_argb, int width) { - asm volatile("movdqa %3,%%xmm2 \n" + asm volatile("movdqa %3,%%xmm2 \n" LABELALIGN - "1: \n" - "movdqu (%0),%%xmm0 \n" - "movdqu 0x10(%0),%%xmm1 \n" - "psrlw $8,%%xmm0 \n" - "psrlw $8,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "pshufb %%xmm2,%%xmm0 \n" - "movdqu %%xmm0,(%1) \n" - "lea 0x20(%0),%0 \n" - "lea 0x10(%1),%1 \n" - "sub $0x4,%2 \n" - "jg 1b \n" + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "psrlw $8,%%xmm0 \n" + "psrlw $8,%%xmm1 \n" + "packuswb %%xmm1,%%xmm0 \n" + "pshufb %%xmm2,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x20(%0),%0 \n" + "lea 0x10(%1),%1 \n" + "sub $0x4,%2 \n" + "jg 1b \n" : "+r"(src_ab64), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 @@ -1271,21 +1317,21 @@ void AR64ToARGBRow_AVX2(const uint16_t* src_ar64, void AB64ToARGBRow_AVX2(const uint16_t* src_ab64, uint8_t* dst_argb, int width) { - asm volatile("vbroadcasti128 %3,%%ymm2 \n" LABELALIGN - "1: \n" - "vmovdqu (%0),%%ymm0 \n" - "vmovdqu 0x20(%0),%%ymm1 \n" - "vpsrlw $8,%%ymm0,%%ymm0 \n" - "vpsrlw $8,%%ymm1,%%ymm1 \n" - "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vpshufb %%ymm2,%%ymm0,%%ymm0 \n" - "vmovdqu %%ymm0,(%1) \n" - "lea 0x40(%0),%0 \n" - "lea 0x20(%1),%1 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - "vzeroupper \n" + asm volatile("vbroadcasti128 %3,%%ymm2 \n" LABELALIGN + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "vpsrlw $8,%%ymm0,%%ymm0 \n" + "vpsrlw $8,%%ymm1,%%ymm1 \n" + "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "vpshufb %%ymm2,%%ymm0,%%ymm0 \n" + "vmovdqu %%ymm0,(%1) \n" + "lea 0x40(%0),%0 \n" + "lea 0x20(%1),%1 \n" + "sub $0x8,%2 \n" + "jg 1b \n" + "vzeroupper \n" : "+r"(src_ab64), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 @@ -1465,7 +1511,9 @@ void ARGBToYMatrixRow_SSSE3(const uint8_t* src_argb, "movdqa %%xmm4,%%xmm6 \n" "pmaddubsw %%xmm5,%%xmm6 \n" "phaddw %%xmm6,%%xmm6 \n" - "psubw %%xmm6,%%xmm7 \n" LABELALIGN "" RGBTOY(xmm7) + "psubw %%xmm6,%%xmm7 \n" + LABELALIGN "" + RGBTOY(xmm7) : "+r"(src_argb), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 @@ -1489,8 +1537,10 @@ void ARGBToYMatrixRow_AVX2(const uint8_t* src_argb, "vpmaddubsw %%ymm5,%%ymm4,%%ymm6 \n" "vphaddw %%ymm6,%%ymm6,%%ymm6 \n" "vpsubw %%ymm6,%%ymm7,%%ymm7 \n" - "vmovdqa %4,%%ymm6 \n" LABELALIGN - "" RGBTOY_AVX2(ymm7) "vzeroupper \n" + "vmovdqa %4,%%ymm6 \n" + LABELALIGN "" + RGBTOY_AVX2(ymm7) + "vzeroupper \n" : "+r"(src_argb), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 @@ -1501,9 +1551,8 @@ void ARGBToYMatrixRow_AVX2(const uint8_t* src_argb, } #endif -#if defined(HAS_ARGBTOYROW_AVX512BW) || \ - defined(HAS_ARGBTOUV444ROW_AVX512BW) || defined(HAS_ARGBTOUVROW_AVX512BW) -static const uint32_t kPermdARGBToY_AVX512BW[16] = {0, 4, 8, 12, 1, 5, 9, 13, +#if defined(HAS_ARGBTOYROW_AVX512BW) || defined(HAS_ARGBTOUV444ROW_AVX512BW) || defined(HAS_ARGBTOUVROW_AVX512BW) +static const uint32_t kPermdARGBToY_AVX512BW[16] = {0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15}; #endif @@ -1521,14 +1570,15 @@ void ARGBToYMatrixRow_AVX512BW(const uint8_t* src_argb, "vpternlogd $0xff,%%zmm16,%%zmm16,%%zmm16 \n" "vpsllw $15,%%zmm16,%%zmm5 \n" "vpacksswb %%zmm5,%%zmm5,%%zmm5 \n" - "vpsrlw $15,%%zmm16,%%zmm16 \n" // zmm16 = 1 + "vpsrlw $15,%%zmm16,%%zmm16 \n" // zmm16 = 1 "vbroadcasti64x4 0(%3),%%zmm4 \n" "vbroadcasti64x4 0x60(%3),%%zmm7 \n" "vpmaddubsw %%zmm5,%%zmm4,%%zmm6 \n" "vpmaddwd %%zmm16,%%zmm6,%%zmm6 \n" "vpackssdw %%zmm6,%%zmm6,%%zmm6 \n" "vpsubw %%zmm6,%%zmm7,%%zmm7 \n" - "vmovups %4,%%zmm6 \n" LABELALIGN + "vmovups %4,%%zmm6 \n" + LABELALIGN "1: \n" "vmovups (%0),%%zmm0 \n" "vmovups 0x40(%0),%%zmm1 \n" @@ -1560,13 +1610,13 @@ void ARGBToYMatrixRow_AVX512BW(const uint8_t* src_argb, "sub $0x40,%2 \n" "jg 1b \n" "vzeroupper \n" - : "+r"(src_argb), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : "r"(c), // %3 - "m"(kPermdARGBToY_AVX512BW) // %4 - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", - "xmm7", "xmm16"); + : "+r"(src_argb), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : "r"(c), // %3 + "m"(kPermdARGBToY_AVX512BW) // %4 + : "memory", "cc", "zmm0", "zmm1", "zmm2", "zmm3", "zmm4", "zmm5", "zmm6", + "zmm7", "zmm16"); } #endif @@ -1707,8 +1757,8 @@ void ARGBToUV444MatrixRow_AVX2(const uint8_t* src_argb, #endif : "r"(c), // %4 "m"(kPermdARGBToY_AVX) // %5 - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", - "xmm7"); + : "memory", "cc", "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", + "ymm7"); } #endif // HAS_ARGBTOUV444ROW_AVX2 @@ -1722,8 +1772,8 @@ void ARGBToUV444MatrixRow_AVX512BW(const uint8_t* src_argb, asm volatile( "vbroadcasti64x4 0x20(%4),%%zmm3 \n" // kRGBToU "vbroadcasti64x4 0x40(%4),%%zmm4 \n" // kRGBToV - "vpternlogd $0xff,%%zmm16,%%zmm16,%%zmm16 \n" // -1 - "vpsllw $15,%%zmm16,%%zmm5 \n" // 0x8000 + "vpternlogd $0xff,%%zmm16,%%zmm16,%%zmm16 \n" // -1 + "vpsllw $15,%%zmm16,%%zmm5 \n" // 0x8000 "vmovups %5,%%zmm7 \n" "sub %1,%2 \n" @@ -1787,8 +1837,8 @@ void ARGBToUV444MatrixRow_AVX512BW(const uint8_t* src_argb, #endif : "r"(c), // %4 "m"(kPermdARGBToY_AVX512BW) // %5 - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", - "xmm7", "xmm16"); + : "memory", "cc", "zmm0", "zmm1", "zmm2", "zmm3", "zmm4", "zmm5", "zmm6", + "zmm7", "zmm16"); } #endif // HAS_ARGBTOUV444ROW_AVX512BW @@ -1883,8 +1933,8 @@ void ARGBToUVMatrixRow_AVX2(const uint8_t* src_argb, int width, const struct ArgbConstants* c) { asm volatile( - "vbroadcasti128 0x20(%5),%%ymm4 \n" // RGBToU - "vbroadcasti128 0x40(%5),%%ymm5 \n" // RGBToV + "vbroadcasti128 0x20(%5),%%ymm4 \n" // RGBToU + "vbroadcasti128 0x40(%5),%%ymm5 \n" // RGBToV "vpcmpeqb %%ymm6,%%ymm6,%%ymm6 \n" // 0x0101 "vpabsb %%ymm6,%%ymm6 \n" "vmovdqa %6,%%ymm7 \n" // kShuffleAARRGGBB @@ -1964,6 +2014,7 @@ void ARGBToUV444Row_SSSE3(const uint8_t* src_argb, } #endif // HAS_ARGBTOUV444ROW_SSSE3 + #ifdef HAS_ARGBTOYROW_AVX2 void RGBAToYRow_AVX2(const uint8_t* src_rgba, uint8_t* dst_y, int width) { ARGBToYMatrixRow_AVX2(src_rgba, dst_y, width, &kRgbaI601Constants); @@ -1976,6 +2027,7 @@ void BGRAToYRow_AVX2(const uint8_t* src_bgra, uint8_t* dst_y, int width) { } #endif + #ifdef HAS_ARGBTOYROW_AVX512BW void ARGBToYRow_AVX512BW(const uint8_t* src_argb, uint8_t* dst_y, int width) { ARGBToYMatrixRow_AVX512BW(src_argb, dst_y, width, &kArgbI601Constants); @@ -2183,8 +2235,8 @@ void ARGBToUVMatrixRow_AVX512BW(const uint8_t* src_argb, "vbroadcasti64x4 0x20(%5),%%zmm4 \n" // RGBToU "vbroadcasti64x4 0x40(%5),%%zmm5 \n" // RGBToV "vpternlogd $0xff,%%zmm16,%%zmm16,%%zmm16 \n" - "vpabsb %%zmm16,%%zmm6 \n" // 0x0101 - "vpsllw $15,%%zmm16,%%zmm17 \n" // 0x8000 + "vpabsb %%zmm16,%%zmm6 \n" // 0x0101 + "vpsllw $15,%%zmm16,%%zmm17 \n" // 0x8000 "vbroadcasti64x4 %6,%%zmm7 \n" // kShuffleAARRGGBB "vmovups %7,%%zmm18 \n" // kPermdARGBToY_AVX512BW "vmovups %8,%%zmm19 \n" // kPermdARGBToUV_AVX512BW @@ -2218,8 +2270,7 @@ void ARGBToUVMatrixRow_AVX512BW(const uint8_t* src_argb, "vpmaddubsw %%zmm5,%%zmm0,%%zmm0 \n" // 16 V "vpmaddwd %%zmm16,%%zmm1,%%zmm1 \n" "vpmaddwd %%zmm16,%%zmm0,%%zmm0 \n" - "vpackssdw %%zmm0,%%zmm1,%%zmm0 \n" // mutates (U in lower, V - // in upper) + "vpackssdw %%zmm0,%%zmm1,%%zmm0 \n" // mutates (U in lower, V in upper) "vpaddw %%zmm17,%%zmm0,%%zmm0 \n" "vpsrlw $0x8,%%zmm0,%%zmm0 \n" "vpackuswb %%zmm0,%%zmm0,%%zmm0 \n" // mutates @@ -2247,8 +2298,8 @@ void ARGBToUVMatrixRow_AVX512BW(const uint8_t* src_argb, "m"(kShuffleAARRGGBB), // %6 "m"(kPermdARGBToY_AVX512BW), // %7 "m"(kPermdARGBToUV_AVX512BW) // %8 - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", - "xmm7", "xmm16", "xmm17", "xmm18", "xmm19"); + : "memory", "cc", "zmm0", "zmm1", "zmm2", "zmm3", "zmm4", "zmm5", "zmm6", + "zmm7", "zmm16", "zmm17", "zmm18", "zmm19"); } void ARGBToUVRow_AVX512BW(const uint8_t* src_argb, @@ -2669,12 +2720,12 @@ void OMITFP I444AlphaToARGBRow_SSSE3(const uint8_t* y_buf, const struct YuvConstants* yuvconstants, int width) { asm volatile(YUVTORGB_SETUP( - yuvconstants) "sub %[u_buf],%[v_buf] \n" + yuvconstants) "sub %[u_buf],%[v_buf] \n" - LABELALIGN "1: \n" READYUVA444 YUVTORGB(yuvconstants) + LABELALIGN "1: \n" READYUVA444 YUVTORGB(yuvconstants) STOREARGB - "subl $0x8,%[width] \n" - "jg 1b \n" + "subl $0x8,%[width] \n" + "jg 1b \n" : [y_buf] "+r"(y_buf), // %[y_buf] [u_buf] "+r"(u_buf), // %[u_buf] [v_buf] "+r"(v_buf), // %[v_buf] @@ -2995,12 +3046,12 @@ void OMITFP I210AlphaToARGBRow_SSSE3(const uint16_t* y_buf, const struct YuvConstants* yuvconstants, int width) { asm volatile(YUVTORGB_SETUP( - yuvconstants) "sub %[u_buf],%[v_buf] \n" + yuvconstants) "sub %[u_buf],%[v_buf] \n" - LABELALIGN "1: \n" READYUVA210 YUVTORGB(yuvconstants) + LABELALIGN "1: \n" READYUVA210 YUVTORGB(yuvconstants) STOREARGB - "subl $0x8,%[width] \n" - "jg 1b \n" + "subl $0x8,%[width] \n" + "jg 1b \n" : [y_buf] "+r"(y_buf), // %[y_buf] [u_buf] "+r"(u_buf), // %[u_buf] [v_buf] "+r"(v_buf), // %[v_buf] @@ -3027,12 +3078,12 @@ void OMITFP I410AlphaToARGBRow_SSSE3(const uint16_t* y_buf, const struct YuvConstants* yuvconstants, int width) { asm volatile(YUVTORGB_SETUP( - yuvconstants) "sub %[u_buf],%[v_buf] \n" + yuvconstants) "sub %[u_buf],%[v_buf] \n" - LABELALIGN "1: \n" READYUVA410 YUVTORGB(yuvconstants) + LABELALIGN "1: \n" READYUVA410 YUVTORGB(yuvconstants) STOREARGB - "subl $0x8,%[width] \n" - "jg 1b \n" + "subl $0x8,%[width] \n" + "jg 1b \n" : [y_buf] "+r"(y_buf), // %[y_buf] [u_buf] "+r"(u_buf), // %[u_buf] [v_buf] "+r"(v_buf), // %[v_buf] @@ -3093,12 +3144,12 @@ void OMITFP I422AlphaToARGBRow_SSSE3(const uint8_t* y_buf, const struct YuvConstants* yuvconstants, int width) { asm volatile(YUVTORGB_SETUP( - yuvconstants) "sub %[u_buf],%[v_buf] \n" + yuvconstants) "sub %[u_buf],%[v_buf] \n" - LABELALIGN "1: \n" READYUVA422 YUVTORGB(yuvconstants) + LABELALIGN "1: \n" READYUVA422 YUVTORGB(yuvconstants) STOREARGB - "subl $0x8,%[width] \n" - "jg 1b \n" + "subl $0x8,%[width] \n" + "jg 1b \n" : [y_buf] "+r"(y_buf), // %[y_buf] [u_buf] "+r"(u_buf), // %[u_buf] [v_buf] "+r"(v_buf), // %[v_buf] @@ -3121,12 +3172,12 @@ void OMITFP NV12ToARGBRow_SSSE3(const uint8_t* y_buf, const struct YuvConstants* yuvconstants, int width) { asm volatile(YUVTORGB_SETUP( - yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n" + yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n" - LABELALIGN "1: \n" READNV12 YUVTORGB(yuvconstants) + LABELALIGN "1: \n" READNV12 YUVTORGB(yuvconstants) STOREARGB - "sub $0x8,%[width] \n" - "jg 1b \n" + "sub $0x8,%[width] \n" + "jg 1b \n" : [y_buf] "+r"(y_buf), // %[y_buf] [uv_buf] "+r"(uv_buf), // %[uv_buf] [dst_argb] "+r"(dst_argb), // %[dst_argb] @@ -3142,12 +3193,12 @@ void OMITFP NV21ToARGBRow_SSSE3(const uint8_t* y_buf, const struct YuvConstants* yuvconstants, int width) { asm volatile(YUVTORGB_SETUP( - yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n" + yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n" - LABELALIGN "1: \n" READNV21 YUVTORGB(yuvconstants) + LABELALIGN "1: \n" READNV21 YUVTORGB(yuvconstants) STOREARGB - "sub $0x8,%[width] \n" - "jg 1b \n" + "sub $0x8,%[width] \n" + "jg 1b \n" : [y_buf] "+r"(y_buf), // %[y_buf] [vu_buf] "+r"(vu_buf), // %[vu_buf] [dst_argb] "+r"(dst_argb), // %[dst_argb] @@ -3165,7 +3216,7 @@ void OMITFP YUY2ToARGBRow_SSSE3(const uint8_t* yuy2_buf, asm volatile( "movdqa %[kShuffleYUY2Y],%%xmm6 \n" "movdqa %[kShuffleYUY2UV],%%xmm7 \n" YUVTORGB_SETUP( - yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n" + yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n" LABELALIGN "1: \n" READYUY2 YUVTORGB(yuvconstants) STOREARGB "sub $0x8,%[width] \n" @@ -3186,7 +3237,7 @@ void OMITFP UYVYToARGBRow_SSSE3(const uint8_t* uyvy_buf, asm volatile( "movdqa %[kShuffleUYVYY],%%xmm6 \n" "movdqa %[kShuffleUYVYUV],%%xmm7 \n" YUVTORGB_SETUP( - yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n" + yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n" LABELALIGN "1: \n" READUYVY YUVTORGB(yuvconstants) STOREARGB "sub $0x8,%[width] \n" @@ -3206,12 +3257,12 @@ void OMITFP P210ToARGBRow_SSSE3(const uint16_t* y_buf, const struct YuvConstants* yuvconstants, int width) { asm volatile(YUVTORGB_SETUP( - yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n" + yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n" - LABELALIGN "1: \n" READP210 YUVTORGB(yuvconstants) + LABELALIGN "1: \n" READP210 YUVTORGB(yuvconstants) STOREARGB - "sub $0x8,%[width] \n" - "jg 1b \n" + "sub $0x8,%[width] \n" + "jg 1b \n" : [y_buf] "+r"(y_buf), // %[y_buf] [uv_buf] "+r"(uv_buf), // %[u_buf] [dst_argb] "+r"(dst_argb), // %[dst_argb] @@ -3227,12 +3278,12 @@ void OMITFP P410ToARGBRow_SSSE3(const uint16_t* y_buf, const struct YuvConstants* yuvconstants, int width) { asm volatile(YUVTORGB_SETUP( - yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n" + yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n" - LABELALIGN "1: \n" READP410 YUVTORGB(yuvconstants) + LABELALIGN "1: \n" READP410 YUVTORGB(yuvconstants) STOREARGB - "sub $0x8,%[width] \n" - "jg 1b \n" + "sub $0x8,%[width] \n" + "jg 1b \n" : [y_buf] "+r"(y_buf), // %[y_buf] [uv_buf] "+r"(uv_buf), // %[u_buf] [dst_argb] "+r"(dst_argb), // %[dst_argb] @@ -4055,13 +4106,13 @@ void OMITFP I210AlphaToARGBRow_AVX2(const uint16_t* y_buf, const struct YuvConstants* yuvconstants, int width) { asm volatile(YUVTORGB_SETUP_AVX2( - yuvconstants) "sub %[u_buf],%[v_buf] \n" + yuvconstants) "sub %[u_buf],%[v_buf] \n" - LABELALIGN "1: \n" READYUVA210_AVX2 YUVTORGB_AVX2( + LABELALIGN "1: \n" READYUVA210_AVX2 YUVTORGB_AVX2( yuvconstants) STOREARGB_AVX2 - "subl $0x10,%[width] \n" - "jg 1b \n" - "vzeroupper \n" + "subl $0x10,%[width] \n" + "jg 1b \n" + "vzeroupper \n" : [y_buf] "+r"(y_buf), // %[y_buf] [u_buf] "+r"(u_buf), // %[u_buf] @@ -4090,13 +4141,13 @@ void OMITFP I410AlphaToARGBRow_AVX2(const uint16_t* y_buf, const struct YuvConstants* yuvconstants, int width) { asm volatile(YUVTORGB_SETUP_AVX2( - yuvconstants) "sub %[u_buf],%[v_buf] \n" + yuvconstants) "sub %[u_buf],%[v_buf] \n" - LABELALIGN "1: \n" READYUVA410_AVX2 YUVTORGB_AVX2( + LABELALIGN "1: \n" READYUVA410_AVX2 YUVTORGB_AVX2( yuvconstants) STOREARGB_AVX2 - "subl $0x10,%[width] \n" - "jg 1b \n" - "vzeroupper \n" + "subl $0x10,%[width] \n" + "jg 1b \n" + "vzeroupper \n" : [y_buf] "+r"(y_buf), // %[y_buf] [u_buf] "+r"(u_buf), // %[u_buf] @@ -4165,13 +4216,13 @@ void OMITFP I444AlphaToARGBRow_AVX2(const uint8_t* y_buf, const struct YuvConstants* yuvconstants, int width) { asm volatile(YUVTORGB_SETUP_AVX2( - yuvconstants) "sub %[u_buf],%[v_buf] \n" + yuvconstants) "sub %[u_buf],%[v_buf] \n" - LABELALIGN "1: \n" READYUVA444_AVX2 YUVTORGB_AVX2( + LABELALIGN "1: \n" READYUVA444_AVX2 YUVTORGB_AVX2( yuvconstants) STOREARGB_AVX2 - "subl $0x10,%[width] \n" - "jg 1b \n" - "vzeroupper \n" + "subl $0x10,%[width] \n" + "jg 1b \n" + "vzeroupper \n" : [y_buf] "+r"(y_buf), // %[y_buf] [u_buf] "+r"(u_buf), // %[u_buf] [v_buf] "+r"(v_buf), // %[v_buf] @@ -4199,13 +4250,13 @@ void OMITFP I422AlphaToARGBRow_AVX2(const uint8_t* y_buf, const struct YuvConstants* yuvconstants, int width) { asm volatile(YUVTORGB_SETUP_AVX2( - yuvconstants) "sub %[u_buf],%[v_buf] \n" + yuvconstants) "sub %[u_buf],%[v_buf] \n" - LABELALIGN "1: \n" READYUVA422_AVX2 YUVTORGB_AVX2( + LABELALIGN "1: \n" READYUVA422_AVX2 YUVTORGB_AVX2( yuvconstants) STOREARGB_AVX2 - "subl $0x10,%[width] \n" - "jg 1b \n" - "vzeroupper \n" + "subl $0x10,%[width] \n" + "jg 1b \n" + "vzeroupper \n" : [y_buf] "+r"(y_buf), // %[y_buf] [u_buf] "+r"(u_buf), // %[u_buf] [v_buf] "+r"(v_buf), // %[v_buf] @@ -4275,13 +4326,13 @@ void OMITFP NV12ToARGBRow_AVX2(const uint8_t* y_buf, const struct YuvConstants* yuvconstants, int width) { asm volatile(YUVTORGB_SETUP_AVX2( - yuvconstants) "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" + yuvconstants) "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" - LABELALIGN "1: \n" READNV12_AVX2 YUVTORGB_AVX2( + LABELALIGN "1: \n" READNV12_AVX2 YUVTORGB_AVX2( yuvconstants) STOREARGB_AVX2 - "sub $0x10,%[width] \n" - "jg 1b \n" - "vzeroupper \n" + "sub $0x10,%[width] \n" + "jg 1b \n" + "vzeroupper \n" : [y_buf] "+r"(y_buf), // %[y_buf] [uv_buf] "+r"(uv_buf), // %[uv_buf] [dst_argb] "+r"(dst_argb), // %[dst_argb] @@ -4301,13 +4352,13 @@ void OMITFP NV21ToARGBRow_AVX2(const uint8_t* y_buf, const struct YuvConstants* yuvconstants, int width) { asm volatile(YUVTORGB_SETUP_AVX2( - yuvconstants) "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" + yuvconstants) "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" - LABELALIGN "1: \n" READNV21_AVX2 YUVTORGB_AVX2( + LABELALIGN "1: \n" READNV21_AVX2 YUVTORGB_AVX2( yuvconstants) STOREARGB_AVX2 - "sub $0x10,%[width] \n" - "jg 1b \n" - "vzeroupper \n" + "sub $0x10,%[width] \n" + "jg 1b \n" + "vzeroupper \n" : [y_buf] "+r"(y_buf), // %[y_buf] [vu_buf] "+r"(vu_buf), // %[vu_buf] [dst_argb] "+r"(dst_argb), // %[dst_argb] @@ -4329,7 +4380,7 @@ void OMITFP YUY2ToARGBRow_AVX2(const uint8_t* yuy2_buf, asm volatile( "vbroadcasti128 %[kShuffleYUY2Y],%%ymm6 \n" "vbroadcasti128 %[kShuffleYUY2UV],%%ymm7 \n" YUVTORGB_SETUP_AVX2( - yuvconstants) "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" + yuvconstants) "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" LABELALIGN "1: \n" READYUY2_AVX2 YUVTORGB_AVX2(yuvconstants) STOREARGB_AVX2 @@ -4356,7 +4407,7 @@ void OMITFP UYVYToARGBRow_AVX2(const uint8_t* uyvy_buf, asm volatile( "vbroadcasti128 %[kShuffleUYVYY],%%ymm6 \n" "vbroadcasti128 %[kShuffleUYVYUV],%%ymm7 \n" YUVTORGB_SETUP_AVX2( - yuvconstants) "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" + yuvconstants) "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" LABELALIGN "1: \n" READUYVY_AVX2 YUVTORGB_AVX2(yuvconstants) STOREARGB_AVX2 @@ -4382,13 +4433,13 @@ void OMITFP P210ToARGBRow_AVX2(const uint16_t* y_buf, const struct YuvConstants* yuvconstants, int width) { asm volatile(YUVTORGB_SETUP_AVX2( - yuvconstants) "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" + yuvconstants) "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" - LABELALIGN "1: \n" READP210_AVX2 YUVTORGB_AVX2( + LABELALIGN "1: \n" READP210_AVX2 YUVTORGB_AVX2( yuvconstants) STOREARGB_AVX2 - "sub $0x10,%[width] \n" - "jg 1b \n" - "vzeroupper \n" + "sub $0x10,%[width] \n" + "jg 1b \n" + "vzeroupper \n" : [y_buf] "+r"(y_buf), // %[y_buf] [uv_buf] "+r"(uv_buf), // %[uv_buf] [dst_argb] "+r"(dst_argb), // %[dst_argb] @@ -4408,13 +4459,13 @@ void OMITFP P410ToARGBRow_AVX2(const uint16_t* y_buf, const struct YuvConstants* yuvconstants, int width) { asm volatile(YUVTORGB_SETUP_AVX2( - yuvconstants) "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" + yuvconstants) "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" - LABELALIGN "1: \n" READP410_AVX2 YUVTORGB_AVX2( + LABELALIGN "1: \n" READP410_AVX2 YUVTORGB_AVX2( yuvconstants) STOREARGB_AVX2 - "sub $0x10,%[width] \n" - "jg 1b \n" - "vzeroupper \n" + "sub $0x10,%[width] \n" + "jg 1b \n" + "vzeroupper \n" : [y_buf] "+r"(y_buf), // %[y_buf] [uv_buf] "+r"(uv_buf), // %[uv_buf] [dst_argb] "+r"(dst_argb), // %[dst_argb] @@ -4593,16 +4644,16 @@ static const uvec8 kShuffleMirror = {15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, void MirrorRow_SSSE3(const uint8_t* src, uint8_t* dst, int width) { ptrdiff_t temp_width = (ptrdiff_t)(width); - asm volatile("movdqa %3,%%xmm5 \n" + asm volatile("movdqa %3,%%xmm5 \n" LABELALIGN - "1: \n" - "movdqu -0x10(%0,%2,1),%%xmm0 \n" - "pshufb %%xmm5,%%xmm0 \n" - "movdqu %%xmm0,(%1) \n" - "lea 0x10(%1),%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" + "1: \n" + "movdqu -0x10(%0,%2,1),%%xmm0 \n" + "pshufb %%xmm5,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(temp_width) // %2 @@ -4611,44 +4662,21 @@ void MirrorRow_SSSE3(const uint8_t* src, uint8_t* dst, int width) { } #endif // HAS_MIRRORROW_SSSE3 -#ifdef HAS_MIRRORROW_AVX512BW -void MirrorRow_AVX512BW(const uint8_t* src, uint8_t* dst, int width) { - ptrdiff_t temp_width = (ptrdiff_t)(width); - asm volatile("vbroadcasti32x4 %3,%%zmm5 \n" - - LABELALIGN - "1: \n" - "vmovdqu8 -0x40(%0,%2,1),%%zmm0 \n" - "vpshufb %%zmm5,%%zmm0,%%zmm0 \n" - "vshufi64x2 $0x1b,%%zmm0,%%zmm0,%%zmm0 \n" - "vmovdqu8 %%zmm0,(%1) \n" - "lea 0x40(%1),%1 \n" - "sub $0x40,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(temp_width) // %2 - : "m"(kShuffleMirror) // %3 - : "memory", "cc", "zmm0", "zmm5"); -} -#endif // HAS_MIRRORROW_AVX512BW - #ifdef HAS_MIRRORROW_AVX2 void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width) { ptrdiff_t temp_width = (ptrdiff_t)(width); - asm volatile("vbroadcasti128 %3,%%ymm5 \n" + asm volatile("vbroadcasti128 %3,%%ymm5 \n" LABELALIGN - "1: \n" - "vmovdqu -0x20(%0,%2,1),%%ymm0 \n" - "vpshufb %%ymm5,%%ymm0,%%ymm0 \n" - "vpermq $0x4e,%%ymm0,%%ymm0 \n" - "vmovdqu %%ymm0,(%1) \n" - "lea 0x20(%1),%1 \n" - "sub $0x20,%2 \n" - "jg 1b \n" - "vzeroupper \n" + "1: \n" + "vmovdqu -0x20(%0,%2,1),%%ymm0 \n" + "vpshufb %%ymm5,%%ymm0,%%ymm0 \n" + "vpermq $0x4e,%%ymm0,%%ymm0 \n" + "vmovdqu %%ymm0,(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x20,%2 \n" + "jg 1b \n" + "vzeroupper \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(temp_width) // %2 @@ -4657,82 +4685,6 @@ void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width) { } #endif // HAS_MIRRORROW_AVX2 -#if defined(HAS_MIRRORSPLITUVROW_AVX2) || defined(HAS_MIRRORSPLITUVROW_AVX512BW) -// Shuffle table for reversing the bytes of UV channels. -static const uvec8 kShuffleMirrorSplitUV = {14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, - 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u}; -#endif - -#ifdef HAS_MIRRORSPLITUVROW_AVX512BW -static const uint64_t kMirrorSplitUVPermute[8] = {6, 4, 2, 0, 7, 5, 3, 1}; - -void MirrorSplitUVRow_AVX512BW(const uint8_t* src, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - ptrdiff_t temp_width = (ptrdiff_t)(width); - asm volatile( - "vbroadcasti32x4 %4,%%zmm1 \n" - "lea -0x40(%0,%3,2),%0 \n" - "sub %1,%2 \n" - "vmovdqu64 %5,%%zmm3 \n" - - LABELALIGN - "1: \n" - "vmovdqu8 (%0),%%zmm0 \n" - "lea -0x40(%0),%0 \n" - "vpshufb %%zmm1,%%zmm0,%%zmm0 \n" - "vpermq %%zmm0,%%zmm3,%%zmm0 \n" - "vextracti64x4 $0x1,%%zmm0,%%ymm2 \n" - "vmovdqu %%ymm0,(%1) \n" - "vmovdqu %%ymm2,0x00(%1,%2,1) \n" - "lea 0x20(%1),%1 \n" - "sub $0x20,%3 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(temp_width) // %3 - : "m"(kShuffleMirrorSplitUV), // %4 - "m"(kMirrorSplitUVPermute) // %5 - : "memory", "cc", "zmm0", "zmm1", "zmm2", "zmm3"); -} -#endif // HAS_MIRRORSPLITUVROW_AVX512BW - -#ifdef HAS_MIRRORSPLITUVROW_AVX2 -void MirrorSplitUVRow_AVX2(const uint8_t* src, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - ptrdiff_t temp_width = (ptrdiff_t)(width); - asm volatile( - "vbroadcasti128 %4,%%ymm1 \n" - "lea -0x20(%0,%3,2),%0 \n" - "sub %1,%2 \n" - - LABELALIGN - "1: \n" - "vmovdqu (%0),%%ymm0 \n" - "lea -0x20(%0),%0 \n" - "vpshufb %%ymm1,%%ymm0,%%ymm0 \n" - "vpermq $0x72,%%ymm0,%%ymm0 \n" - "vextracti128 $0x1,%%ymm0,%%xmm2 \n" - "vmovdqu %%xmm0,(%1) \n" - "vmovdqu %%xmm2,0x00(%1,%2,1) \n" - "lea 0x10(%1),%1 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(temp_width) // %3 - : "m"(kShuffleMirrorSplitUV) // %4 - : "memory", "cc", "xmm0", "xmm1", "xmm2"); -} -#endif // HAS_MIRRORSPLITUVROW_AVX2 - #ifdef HAS_MIRRORUVROW_SSSE3 // Shuffle table for reversing the UV. static const uvec8 kShuffleMirrorUV = {14u, 15u, 12u, 13u, 10u, 11u, 8u, 9u, @@ -4740,16 +4692,16 @@ static const uvec8 kShuffleMirrorUV = {14u, 15u, 12u, 13u, 10u, 11u, 8u, 9u, void MirrorUVRow_SSSE3(const uint8_t* src_uv, uint8_t* dst_uv, int width) { ptrdiff_t temp_width = (ptrdiff_t)(width); - asm volatile("movdqa %3,%%xmm5 \n" + asm volatile("movdqa %3,%%xmm5 \n" LABELALIGN - "1: \n" - "movdqu -0x10(%0,%2,2),%%xmm0 \n" - "pshufb %%xmm5,%%xmm0 \n" - "movdqu %%xmm0,(%1) \n" - "lea 0x10(%1),%1 \n" - "sub $0x8,%2 \n" - "jg 1b \n" + "1: \n" + "movdqu -0x10(%0,%2,2),%%xmm0 \n" + "pshufb %%xmm5,%%xmm0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x8,%2 \n" + "jg 1b \n" : "+r"(src_uv), // %0 "+r"(dst_uv), // %1 "+r"(temp_width) // %2 @@ -4761,18 +4713,18 @@ void MirrorUVRow_SSSE3(const uint8_t* src_uv, uint8_t* dst_uv, int width) { #ifdef HAS_MIRRORUVROW_AVX2 void MirrorUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_uv, int width) { ptrdiff_t temp_width = (ptrdiff_t)(width); - asm volatile("vbroadcasti128 %3,%%ymm5 \n" + asm volatile("vbroadcasti128 %3,%%ymm5 \n" LABELALIGN - "1: \n" - "vmovdqu -0x20(%0,%2,2),%%ymm0 \n" - "vpshufb %%ymm5,%%ymm0,%%ymm0 \n" - "vpermq $0x4e,%%ymm0,%%ymm0 \n" - "vmovdqu %%ymm0,(%1) \n" - "lea 0x20(%1),%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - "vzeroupper \n" + "1: \n" + "vmovdqu -0x20(%0,%2,2),%%ymm0 \n" + "vpshufb %%ymm5,%%ymm0,%%ymm0 \n" + "vpermq $0x4e,%%ymm0,%%ymm0 \n" + "vmovdqu %%ymm0,(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + "vzeroupper \n" : "+r"(src_uv), // %0 "+r"(dst_uv), // %1 "+r"(temp_width) // %2 @@ -4781,6 +4733,39 @@ void MirrorUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_uv, int width) { } #endif // HAS_MIRRORUVROW_AVX2 +#ifdef HAS_MIRRORSPLITUVROW_SSSE3 +// Shuffle table for reversing the bytes of UV channels. +static const uvec8 kShuffleMirrorSplitUV = {14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, + 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u}; +void MirrorSplitUVRow_SSSE3(const uint8_t* src, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + ptrdiff_t temp_width = (ptrdiff_t)(width); + asm volatile( + "movdqa %4,%%xmm1 \n" + "lea -0x10(%0,%3,2),%0 \n" + "sub %1,%2 \n" + + LABELALIGN + "1: \n" + "movdqu (%0),%%xmm0 \n" + "lea -0x10(%0),%0 \n" + "pshufb %%xmm1,%%xmm0 \n" + "movlpd %%xmm0,(%1) \n" + "movhpd %%xmm0,0x00(%1,%2,1) \n" + "lea 0x8(%1),%1 \n" + "sub $8,%3 \n" + "jg 1b \n" + : "+r"(src), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+r"(temp_width) // %3 + : "m"(kShuffleMirrorSplitUV) // %4 + : "memory", "cc", "xmm0", "xmm1"); +} +#endif // HAS_MIRRORSPLITUVROW_SSSE3 + #ifdef HAS_RGB24MIRRORROW_SSSE3 // Shuffle first 5 pixels to last 5 mirrored. first byte zero @@ -4828,73 +4813,21 @@ void RGB24MirrorRow_SSSE3(const uint8_t* src_rgb24, } #endif // HAS_RGB24MIRRORROW_SSSE3 -#ifdef HAS_RGB24MIRRORROW_AVX2 -// Shuffle first 10 pixels to last 10 mirrored. first byte zero -static const uvec8 kShuffleMirrorRGB0_AVX = { - 128u, 12u, 13u, 14u, 9u, 10u, 11u, 6u, 7u, 8u, 3u, 4u, 5u, 0u, 1u, 2u}; - -// Shuffle last 2 pixels to first 2 mirrored. last byte zero -static const uvec8 kShuffleMirrorRGB1_AVX = { - 13u, 14u, 15u, 10u, 11u, 12u, 7u, 8u, 9u, 4u, 5u, 6u, 1u, 2u, 3u, 128u}; - -void RGB24MirrorRow_AVX2(const uint8_t* src_rgb24, - uint8_t* dst_rgb24, - int width) { - ptrdiff_t temp_width = (ptrdiff_t)(width); - src_rgb24 += width * 3 - 96; - asm volatile( - "vbroadcasti128 %3,%%ymm4 \n" - "vmovdqa %4,%%xmm5 \n" - - LABELALIGN - "1: \n" - "vmovdqu (%0),%%xmm0 \n" // first 10 - "vinserti128 $1,15(%0),%%ymm0,%%ymm0 \n" - "vmovdqu 30(%0),%%xmm1 \n" // next 10 - "vinserti128 $1,45(%0),%%ymm1,%%ymm1 \n" - "vmovdqu 60(%0),%%xmm2 \n" // next 10 - "vinserti128 $1,75(%0),%%ymm2,%%ymm2 \n" - "vmovdqu 80(%0),%%xmm3 \n" // last 2 special - "vpshufb %%ymm4,%%ymm0,%%ymm0 \n" - "vpshufb %%ymm4,%%ymm1,%%ymm1 \n" - "vpshufb %%ymm4,%%ymm2,%%ymm2 \n" - "vpshufb %%xmm5,%%xmm3,%%xmm3 \n" - "lea -0x60(%0),%0 \n" - "vmovdqu %%xmm0,80(%1) \n" - "vextracti128 $1,%%ymm0,65(%1) \n" - "vmovdqu %%xmm1,50(%1) \n" - "vextracti128 $1,%%ymm1,35(%1) \n" - "vmovdqu %%xmm2,20(%1) \n" - "vextracti128 $1,%%ymm2,5(%1) \n" - "vmovq %%xmm3,0(%1) \n" - "lea 0x60(%1),%1 \n" - "sub $0x20,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_rgb24), // %0 - "+r"(dst_rgb24), // %1 - "+r"(temp_width) // %2 - : "m"(kShuffleMirrorRGB0_AVX), // %3 - "m"(kShuffleMirrorRGB1_AVX) // %4 - : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); -} -#endif // HAS_RGB24MIRRORROW_AVX2 - #ifdef HAS_ARGBMIRRORROW_SSE2 void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width) { ptrdiff_t temp_width = (ptrdiff_t)(width); - asm volatile("lea -0x10(%0,%2,4),%0 \n" + asm volatile("lea -0x10(%0,%2,4),%0 \n" LABELALIGN - "1: \n" - "movdqu (%0),%%xmm0 \n" - "pshufd $0x1b,%%xmm0,%%xmm0 \n" - "lea -0x10(%0),%0 \n" - "movdqu %%xmm0,(%1) \n" - "lea 0x10(%1),%1 \n" - "sub $0x4,%2 \n" - "jg 1b \n" + "1: \n" + "movdqu (%0),%%xmm0 \n" + "pshufd $0x1b,%%xmm0,%%xmm0 \n" + "lea -0x10(%0),%0 \n" + "movdqu %%xmm0,(%1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x4,%2 \n" + "jg 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(temp_width) // %2 @@ -4908,16 +4841,16 @@ void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width) { static const ulvec32 kARGBShuffleMirror_AVX2 = {7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u}; void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width) { ptrdiff_t temp_width = (ptrdiff_t)(width); - asm volatile("vmovdqu %3,%%ymm5 \n" + asm volatile("vmovdqu %3,%%ymm5 \n" LABELALIGN - "1: \n" - "vpermd -0x20(%0,%2,4),%%ymm5,%%ymm0 \n" - "vmovdqu %%ymm0,(%1) \n" - "lea 0x20(%1),%1 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - "vzeroupper \n" + "1: \n" + "vpermd -0x20(%0,%2,4),%%ymm5,%%ymm0 \n" + "vmovdqu %%ymm0,(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x8,%2 \n" + "jg 1b \n" + "vzeroupper \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(temp_width) // %2 @@ -4964,47 +4897,6 @@ void SplitUVRow_AVX2(const uint8_t* src_uv, } #endif // HAS_SPLITUVROW_AVX2 -#ifdef HAS_SPLITUVROW_AVX512BW -static const uint64_t kSplitUVPermute[8] = {0, 2, 4, 6, 1, 3, 5, 7}; - -void SplitUVRow_AVX512BW(const uint8_t* src_uv, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - asm volatile( - "vpternlogd $0xff,%%zmm5,%%zmm5,%%zmm5 \n" - "vpsrlw $0x8,%%zmm5,%%zmm5 \n" - "vmovdqu64 %4,%%zmm4 \n" - "sub %1,%2 \n" - - LABELALIGN - "1: \n" - "vmovdqu8 (%0),%%zmm0 \n" - "vmovdqu8 0x40(%0),%%zmm1 \n" - "lea 0x80(%0),%0 \n" - "vpsrlw $0x8,%%zmm0,%%zmm2 \n" - "vpsrlw $0x8,%%zmm1,%%zmm3 \n" - "vpandd %%zmm5,%%zmm0,%%zmm0 \n" - "vpandd %%zmm5,%%zmm1,%%zmm1 \n" - "vpackuswb %%zmm1,%%zmm0,%%zmm0 \n" - "vpackuswb %%zmm3,%%zmm2,%%zmm2 \n" - "vpermq %%zmm0,%%zmm4,%%zmm0 \n" - "vpermq %%zmm2,%%zmm4,%%zmm2 \n" - "vmovdqu8 %%zmm0,(%1) \n" - "vmovdqu8 %%zmm2,0x00(%1,%2,1) \n" - "lea 0x40(%1),%1 \n" - "sub $0x40,%3 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_uv), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : "m"(kSplitUVPermute) // %4 - : "memory", "cc", "zmm0", "zmm1", "zmm2", "zmm3", "zmm4", "zmm5"); -} -#endif // HAS_SPLITUVROW_AVX512BW - #ifdef HAS_SPLITUVROW_SSE2 void SplitUVRow_SSE2(const uint8_t* src_uv, uint8_t* dst_u, @@ -5182,20 +5074,20 @@ void MergeUVRow_AVX512BW(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, int width) { - asm volatile("sub %0,%1 \n" + asm volatile("sub %0,%1 \n" LABELALIGN - "1: \n" - "vpmovzxbw (%0),%%zmm0 \n" - "vpmovzxbw 0x00(%0,%1,1),%%zmm1 \n" - "lea 0x20(%0),%0 \n" - "vpsllw $0x8,%%zmm1,%%zmm1 \n" - "vporq %%zmm0,%%zmm1,%%zmm2 \n" - "vmovdqu64 %%zmm2,(%2) \n" - "lea 0x40(%2),%2 \n" - "sub $0x20,%3 \n" - "jg 1b \n" - "vzeroupper \n" + "1: \n" + "vpmovzxbw (%0),%%zmm0 \n" + "vpmovzxbw 0x00(%0,%1,1),%%zmm1 \n" + "lea 0x20(%0),%0 \n" + "vpsllw $0x8,%%zmm1,%%zmm1 \n" + "vporq %%zmm0,%%zmm1,%%zmm2 \n" + "vmovdqu64 %%zmm2,(%2) \n" + "lea 0x40(%2),%2 \n" + "sub $0x20,%3 \n" + "jg 1b \n" + "vzeroupper \n" : "+r"(src_u), // %0 "+r"(src_v), // %1 "+r"(dst_uv), // %2 @@ -5210,20 +5102,20 @@ void MergeUVRow_AVX2(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, int width) { - asm volatile("sub %0,%1 \n" + asm volatile("sub %0,%1 \n" LABELALIGN - "1: \n" - "vpmovzxbw (%0),%%ymm0 \n" - "vpmovzxbw 0x00(%0,%1,1),%%ymm1 \n" - "lea 0x10(%0),%0 \n" - "vpsllw $0x8,%%ymm1,%%ymm1 \n" - "vpor %%ymm0,%%ymm1,%%ymm2 \n" - "vmovdqu %%ymm2,(%2) \n" - "lea 0x20(%2),%2 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - "vzeroupper \n" + "1: \n" + "vpmovzxbw (%0),%%ymm0 \n" + "vpmovzxbw 0x00(%0,%1,1),%%ymm1 \n" + "lea 0x10(%0),%0 \n" + "vpsllw $0x8,%%ymm1,%%ymm1 \n" + "vpor %%ymm0,%%ymm1,%%ymm2 \n" + "vmovdqu %%ymm2,(%2) \n" + "lea 0x20(%2),%2 \n" + "sub $0x10,%3 \n" + "jg 1b \n" + "vzeroupper \n" : "+r"(src_u), // %0 "+r"(src_v), // %1 "+r"(dst_uv), // %2 @@ -5238,21 +5130,21 @@ void MergeUVRow_SSE2(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, int width) { - asm volatile("sub %0,%1 \n" + asm volatile("sub %0,%1 \n" LABELALIGN - "1: \n" - "movdqu (%0),%%xmm0 \n" - "movdqu 0x00(%0,%1,1),%%xmm1 \n" - "lea 0x10(%0),%0 \n" - "movdqa %%xmm0,%%xmm2 \n" - "punpcklbw %%xmm1,%%xmm0 \n" - "punpckhbw %%xmm1,%%xmm2 \n" - "movdqu %%xmm0,(%2) \n" - "movdqu %%xmm2,0x10(%2) \n" - "lea 0x20(%2),%2 \n" - "sub $0x10,%3 \n" - "jg 1b \n" + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x00(%0,%1,1),%%xmm1 \n" + "lea 0x10(%0),%0 \n" + "movdqa %%xmm0,%%xmm2 \n" + "punpcklbw %%xmm1,%%xmm0 \n" + "punpckhbw %%xmm1,%%xmm2 \n" + "movdqu %%xmm0,(%2) \n" + "movdqu %%xmm2,0x10(%2) \n" + "lea 0x20(%2),%2 \n" + "sub $0x10,%3 \n" + "jg 1b \n" : "+r"(src_u), // %0 "+r"(src_v), // %1 "+r"(dst_uv), // %2 @@ -5487,24 +5379,24 @@ void Convert16To8Row_AVX512BW(const uint16_t* src_y, uint8_t* dst_y, int scale, int width) { - asm volatile("vpbroadcastw %3,%%zmm2 \n" + asm volatile("vpbroadcastw %3,%%zmm2 \n" // 64 pixels per loop. LABELALIGN - "1: \n" - "vmovups (%0),%%zmm0 \n" - "vmovups 0x40(%0),%%zmm1 \n" - "add $0x80,%0 \n" - "vpmulhuw %%zmm2,%%zmm0,%%zmm0 \n" - "vpmulhuw %%zmm2,%%zmm1,%%zmm1 \n" - "vpmovuswb %%zmm0,%%ymm0 \n" - "vpmovuswb %%zmm1,%%ymm1 \n" - "vmovups %%ymm0,(%1) \n" - "vmovups %%ymm1,0x20(%1) \n" - "add $0x40,%1 \n" - "sub $0x40,%2 \n" - "jg 1b \n" - "vzeroupper \n" + "1: \n" + "vmovups (%0),%%zmm0 \n" + "vmovups 0x40(%0),%%zmm1 \n" + "add $0x80,%0 \n" + "vpmulhuw %%zmm2,%%zmm0,%%zmm0 \n" + "vpmulhuw %%zmm2,%%zmm1,%%zmm1 \n" + "vpmovuswb %%zmm0,%%ymm0 \n" + "vpmovuswb %%zmm1,%%ymm1 \n" + "vmovups %%ymm0,(%1) \n" + "vmovups %%ymm1,0x20(%1) \n" + "add $0x40,%1 \n" + "sub $0x40,%2 \n" + "jg 1b \n" + "vzeroupper \n" : "+r"(src_y), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 @@ -5554,24 +5446,24 @@ void Convert8To16Row_AVX2(const uint8_t* src_y, int scale, int width) { const int shift = __builtin_clz(scale) - 15; - asm volatile("vmovd %3,%%xmm2 \n" + asm volatile("vmovd %3,%%xmm2 \n" // 32 pixels per loop. LABELALIGN - "1: \n" - "vmovdqu (%0),%%ymm0 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "add $0x20,%0 \n" - "vpunpckhbw %%ymm0,%%ymm0,%%ymm1 \n" - "vpunpcklbw %%ymm0,%%ymm0,%%ymm0 \n" - "vpsrlw %%xmm2,%%ymm0,%%ymm0 \n" - "vpsrlw %%xmm2,%%ymm1,%%ymm1 \n" - "vmovdqu %%ymm0,(%1) \n" - "vmovdqu %%ymm1,0x20(%1) \n" - "add $0x40,%1 \n" - "sub $0x20,%2 \n" - "jg 1b \n" - "vzeroupper \n" + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vpermq $0xd8,%%ymm0,%%ymm0 \n" + "add $0x20,%0 \n" + "vpunpckhbw %%ymm0,%%ymm0,%%ymm1 \n" + "vpunpcklbw %%ymm0,%%ymm0,%%ymm0 \n" + "vpsrlw %%xmm2,%%ymm0,%%ymm0 \n" + "vpsrlw %%xmm2,%%ymm1,%%ymm1 \n" + "vmovdqu %%ymm0,(%1) \n" + "vmovdqu %%ymm1,0x20(%1) \n" + "add $0x40,%1 \n" + "sub $0x20,%2 \n" + "jg 1b \n" + "vzeroupper \n" : "+r"(src_y), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 @@ -6352,7 +6244,7 @@ void MergeXR30Row_AVX2(const uint16_t* src_r, #if defined(__i386__) : "m"(shift) // %5 #else - : "rm"(shift) // %5 + : "rm"(shift) // %5 #endif : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } @@ -6688,7 +6580,7 @@ void CopyRow_AVX512BW(const uint8_t* src, uint8_t* dst, int width) { // Multiple of 1. void CopyRow_ERMS(const uint8_t* src, uint8_t* dst, int width) { size_t width_tmp = (size_t)(width); - asm volatile("rep movsb \n" + asm volatile("rep movsb \n" : "+S"(src), // %0 "+D"(dst), // %1 "+c"(width_tmp) // %2 @@ -6898,7 +6790,7 @@ void ARGBCopyYToAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width) { void SetRow_X86(uint8_t* dst, uint8_t v8, int width) { size_t width_tmp = (size_t)(width >> 2); const uint32_t v32 = v8 * 0x01010101u; // Duplicate byte to all bytes. - asm volatile("rep stosl \n" + asm volatile("rep stosl \n" : "+D"(dst), // %0 "+c"(width_tmp) // %1 : "a"(v32) // %2 @@ -6907,7 +6799,7 @@ void SetRow_X86(uint8_t* dst, uint8_t v8, int width) { void SetRow_ERMS(uint8_t* dst, uint8_t v8, int width) { size_t width_tmp = (size_t)(width); - asm volatile("rep stosb \n" + asm volatile("rep stosb \n" : "+D"(dst), // %0 "+c"(width_tmp) // %1 : "a"(v8) // %2 @@ -6916,7 +6808,7 @@ void SetRow_ERMS(uint8_t* dst, uint8_t v8, int width) { void ARGBSetRow_X86(uint8_t* dst_argb, uint32_t v32, int width) { size_t width_tmp = (size_t)(width); - asm volatile("rep stosl \n" + asm volatile("rep stosl \n" : "+D"(dst_argb), // %0 "+c"(width_tmp) // %1 : "a"(v32) // %2 @@ -8077,28 +7969,28 @@ void ARGBMultiplyRow_SSE2(const uint8_t* src_argb, const uint8_t* src_argb1, uint8_t* dst_argb, int width) { - asm volatile("pxor %%xmm5,%%xmm5 \n" + asm volatile("pxor %%xmm5,%%xmm5 \n" // 4 pixel loop. LABELALIGN - "1: \n" - "movdqu (%0),%%xmm0 \n" - "lea 0x10(%0),%0 \n" - "movdqu (%1),%%xmm2 \n" - "lea 0x10(%1),%1 \n" - "movdqu %%xmm0,%%xmm1 \n" - "movdqu %%xmm2,%%xmm3 \n" - "punpcklbw %%xmm0,%%xmm0 \n" - "punpckhbw %%xmm1,%%xmm1 \n" - "punpcklbw %%xmm5,%%xmm2 \n" - "punpckhbw %%xmm5,%%xmm3 \n" - "pmulhuw %%xmm2,%%xmm0 \n" - "pmulhuw %%xmm3,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqu %%xmm0,(%2) \n" - "lea 0x10(%2),%2 \n" - "sub $0x4,%3 \n" - "jg 1b \n" + "1: \n" + "movdqu (%0),%%xmm0 \n" + "lea 0x10(%0),%0 \n" + "movdqu (%1),%%xmm2 \n" + "lea 0x10(%1),%1 \n" + "movdqu %%xmm0,%%xmm1 \n" + "movdqu %%xmm2,%%xmm3 \n" + "punpcklbw %%xmm0,%%xmm0 \n" + "punpckhbw %%xmm1,%%xmm1 \n" + "punpcklbw %%xmm5,%%xmm2 \n" + "punpckhbw %%xmm5,%%xmm3 \n" + "pmulhuw %%xmm2,%%xmm0 \n" + "pmulhuw %%xmm3,%%xmm1 \n" + "packuswb %%xmm1,%%xmm0 \n" + "movdqu %%xmm0,(%2) \n" + "lea 0x10(%2),%2 \n" + "sub $0x4,%3 \n" + "jg 1b \n" : "+r"(src_argb), // %0 "+r"(src_argb1), // %1 "+r"(dst_argb), // %2 @@ -8114,27 +8006,27 @@ void ARGBMultiplyRow_AVX2(const uint8_t* src_argb, const uint8_t* src_argb1, uint8_t* dst_argb, int width) { - asm volatile("vpxor %%ymm5,%%ymm5,%%ymm5 \n" + asm volatile("vpxor %%ymm5,%%ymm5,%%ymm5 \n" // 4 pixel loop. LABELALIGN - "1: \n" - "vmovdqu (%0),%%ymm1 \n" - "lea 0x20(%0),%0 \n" - "vmovdqu (%1),%%ymm3 \n" - "lea 0x20(%1),%1 \n" - "vpunpcklbw %%ymm1,%%ymm1,%%ymm0 \n" - "vpunpckhbw %%ymm1,%%ymm1,%%ymm1 \n" - "vpunpcklbw %%ymm5,%%ymm3,%%ymm2 \n" - "vpunpckhbw %%ymm5,%%ymm3,%%ymm3 \n" - "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n" - "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n" - "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" - "vmovdqu %%ymm0,(%2) \n" - "lea 0x20(%2),%2 \n" - "sub $0x8,%3 \n" - "jg 1b \n" - "vzeroupper \n" + "1: \n" + "vmovdqu (%0),%%ymm1 \n" + "lea 0x20(%0),%0 \n" + "vmovdqu (%1),%%ymm3 \n" + "lea 0x20(%1),%1 \n" + "vpunpcklbw %%ymm1,%%ymm1,%%ymm0 \n" + "vpunpckhbw %%ymm1,%%ymm1,%%ymm1 \n" + "vpunpcklbw %%ymm5,%%ymm3,%%ymm2 \n" + "vpunpckhbw %%ymm5,%%ymm3,%%ymm3 \n" + "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n" + "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n" + "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" + "vmovdqu %%ymm0,(%2) \n" + "lea 0x20(%2),%2 \n" + "sub $0x8,%3 \n" + "jg 1b \n" + "vzeroupper \n" : "+r"(src_argb), // %0 "+r"(src_argb1), // %1 "+r"(dst_argb), // %2 @@ -8796,6 +8688,87 @@ void ARGBAffineRow_SSE2(const uint8_t* src_argb, } #endif // HAS_ARGBAFFINEROW_SSE2 +#ifdef HAS_INTERPOLATEROW_SSSE3 +// Bilinear filter 16x2 -> 16x1 +void InterpolateRow_SSSE3(uint8_t* dst_ptr, + const uint8_t* src_ptr, + ptrdiff_t src_stride, + int width, + int source_y_fraction) { + asm volatile( + "sub %1,%0 \n" + "cmp $0x0,%3 \n" + "je 100f \n" + "cmp $0x80,%3 \n" + "je 50f \n" + + "movd %3,%%xmm0 \n" + "neg %3 \n" + "add $0x100,%3 \n" + "movd %3,%%xmm5 \n" + "punpcklbw %%xmm0,%%xmm5 \n" + "punpcklwd %%xmm5,%%xmm5 \n" + "pshufd $0x0,%%xmm5,%%xmm5 \n" + "mov $0x80808080,%%eax \n" + "movd %%eax,%%xmm4 \n" + "pshufd $0x0,%%xmm4,%%xmm4 \n" + + // General purpose row blend. + LABELALIGN + "1: \n" + "movdqu (%1),%%xmm0 \n" + "movdqu 0x00(%1,%4,1),%%xmm2 \n" + "movdqa %%xmm0,%%xmm1 \n" + "punpcklbw %%xmm2,%%xmm0 \n" + "punpckhbw %%xmm2,%%xmm1 \n" + "psubb %%xmm4,%%xmm0 \n" + "psubb %%xmm4,%%xmm1 \n" + "movdqa %%xmm5,%%xmm2 \n" + "movdqa %%xmm5,%%xmm3 \n" + "pmaddubsw %%xmm0,%%xmm2 \n" + "pmaddubsw %%xmm1,%%xmm3 \n" + "paddw %%xmm4,%%xmm2 \n" + "paddw %%xmm4,%%xmm3 \n" + "psrlw $0x8,%%xmm2 \n" + "psrlw $0x8,%%xmm3 \n" + "packuswb %%xmm3,%%xmm2 \n" + "movdqu %%xmm2,0x00(%1,%0,1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + "jmp 99f \n" + + // Blend 50 / 50. + LABELALIGN + "50: \n" + "movdqu (%1),%%xmm0 \n" + "movdqu 0x00(%1,%4,1),%%xmm1 \n" + "pavgb %%xmm1,%%xmm0 \n" + "movdqu %%xmm0,0x00(%1,%0,1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 50b \n" + "jmp 99f \n" + + // Blend 100 / 0 - Copy row unchanged. + LABELALIGN + "100: \n" + "movdqu (%1),%%xmm0 \n" + "movdqu %%xmm0,0x00(%1,%0,1) \n" + "lea 0x10(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 100b \n" + + "99: \n" + : "+r"(dst_ptr), // %0 + "+r"(src_ptr), // %1 + "+rm"(width), // %2 + "+r"(source_y_fraction) // %3 + : "r"(src_stride) // %4 + : "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); +} +#endif // HAS_INTERPOLATEROW_SSSE3 + #ifdef HAS_INTERPOLATEROW_AVX2 // Bilinear filter 32x2 -> 32x1 void InterpolateRow_AVX2(uint8_t* dst_ptr, @@ -8874,107 +8847,26 @@ void InterpolateRow_AVX2(uint8_t* dst_ptr, } #endif // HAS_INTERPOLATEROW_AVX2 -#ifdef HAS_INTERPOLATEROW_16_AVX2 -// Bilinear filter 16x2 -> 16x1 -void InterpolateRow_16_AVX2(uint16_t* dst_ptr, - const uint16_t* src_ptr, - ptrdiff_t src_stride, - int width, - int source_y_fraction) { - asm volatile( - "sub %1,%0 \n" - "cmp $0x0,%3 \n" - "je 100f \n" - "cmp $0x80,%3 \n" - "je 50f \n" - - "vmovd %3,%%xmm0 \n" - "neg %3 \n" - "add $0x100,%3 \n" - "vmovd %3,%%xmm5 \n" - "vpunpcklwd %%xmm0,%%xmm5,%%xmm5 \n" - "vpbroadcastd %%xmm5,%%ymm5 \n" - "mov $0x80008000,%%eax \n" // 0x80008000 used to bias - // unsigned words to - // signed range for - // vpmaddwd. - "vmovd %%eax,%%xmm4 \n" - "vbroadcastss %%xmm4,%%ymm4 \n" - "mov $8388736,%%eax \n" // 32768 * 256 + 128 - // rounding constant. - "vmovd %%eax,%%xmm3 \n" - "vbroadcastss %%xmm3,%%ymm3 \n" - - LABELALIGN - "1: \n" - "vmovdqu (%1),%%ymm0 \n" - "vmovdqu (%1,%4,2),%%ymm1 \n" - "vpunpckhwd %%ymm1,%%ymm0,%%ymm2 \n" - "vpunpcklwd %%ymm1,%%ymm0,%%ymm0 \n" - "vpsubw %%ymm4,%%ymm2,%%ymm2 \n" - "vpsubw %%ymm4,%%ymm0,%%ymm0 \n" - "vpmaddwd %%ymm5,%%ymm2,%%ymm2 \n" - "vpmaddwd %%ymm5,%%ymm0,%%ymm0 \n" - "vpaddd %%ymm3,%%ymm2,%%ymm2 \n" - "vpaddd %%ymm3,%%ymm0,%%ymm0 \n" - "vpsrad $0x8,%%ymm2,%%ymm2 \n" - "vpsrad $0x8,%%ymm0,%%ymm0 \n" - "vpackusdw %%ymm2,%%ymm0,%%ymm0 \n" - "vmovdqu %%ymm0,0x00(%1,%0,1) \n" - "lea 0x20(%1),%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - "jmp 99f \n" - - "50: \n" LABELALIGN - "2: \n" - "vmovdqu (%1),%%ymm0 \n" - "vpavgw (%1,%4,2),%%ymm0,%%ymm0 \n" - "vmovdqu %%ymm0,0x00(%1,%0,1) \n" - "lea 0x20(%1),%1 \n" - "sub $0x10,%2 \n" - "jg 2b \n" - "jmp 99f \n" - - "100: \n" LABELALIGN - "3: \n" - "vmovdqu (%1),%%ymm0 \n" - "vmovdqu %%ymm0,0x00(%1,%0,1) \n" - "lea 0x20(%1),%1 \n" - "sub $0x10,%2 \n" - "jg 3b \n" - - "99: \n" - "vzeroupper \n" - : "+r"(dst_ptr), // %0 - "+r"(src_ptr), // %1 - "+r"(width), // %2 - "+r"(source_y_fraction) // %3 - : "r"(src_stride) // %4 - : "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); -} -#endif // HAS_INTERPOLATEROW_16_AVX2 - #ifdef HAS_ARGBSHUFFLEROW_SSSE3 // For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. void ARGBShuffleRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_argb, const uint8_t* shuffler, int width) { - asm volatile("movdqu (%3),%%xmm5 \n" + asm volatile("movdqu (%3),%%xmm5 \n" LABELALIGN - "1: \n" - "movdqu (%0),%%xmm0 \n" - "movdqu 0x10(%0),%%xmm1 \n" - "lea 0x20(%0),%0 \n" - "pshufb %%xmm5,%%xmm0 \n" - "pshufb %%xmm5,%%xmm1 \n" - "movdqu %%xmm0,(%1) \n" - "movdqu %%xmm1,0x10(%1) \n" - "lea 0x20(%1),%1 \n" - "sub $0x8,%2 \n" - "jg 1b \n" + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "lea 0x20(%0),%0 \n" + "pshufb %%xmm5,%%xmm0 \n" + "pshufb %%xmm5,%%xmm1 \n" + "movdqu %%xmm0,(%1) \n" + "movdqu %%xmm1,0x10(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x8,%2 \n" + "jg 1b \n" : "+r"(src_argb), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 @@ -8989,21 +8881,21 @@ void ARGBShuffleRow_AVX2(const uint8_t* src_argb, uint8_t* dst_argb, const uint8_t* shuffler, int width) { - asm volatile("vbroadcasti128 (%3),%%ymm5 \n" + asm volatile("vbroadcasti128 (%3),%%ymm5 \n" LABELALIGN - "1: \n" - "vmovdqu (%0),%%ymm0 \n" - "vmovdqu 0x20(%0),%%ymm1 \n" - "lea 0x40(%0),%0 \n" - "vpshufb %%ymm5,%%ymm0,%%ymm0 \n" - "vpshufb %%ymm5,%%ymm1,%%ymm1 \n" - "vmovdqu %%ymm0,(%1) \n" - "vmovdqu %%ymm1,0x20(%1) \n" - "lea 0x40(%1),%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - "vzeroupper \n" + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "lea 0x40(%0),%0 \n" + "vpshufb %%ymm5,%%ymm0,%%ymm0 \n" + "vpshufb %%ymm5,%%ymm1,%%ymm1 \n" + "vmovdqu %%ymm0,(%1) \n" + "vmovdqu %%ymm1,0x20(%1) \n" + "lea 0x40(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" + "vzeroupper \n" : "+r"(src_argb), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 @@ -9012,59 +8904,30 @@ void ARGBShuffleRow_AVX2(const uint8_t* src_argb, } #endif // HAS_ARGBSHUFFLEROW_AVX2 -#ifdef HAS_ARGBSHUFFLEROW_AVX512BW -// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. -void ARGBShuffleRow_AVX512BW(const uint8_t* src_argb, - uint8_t* dst_argb, - const uint8_t* shuffler, - int width) { - asm volatile("vbroadcasti32x4 (%3),%%zmm5 \n" - - LABELALIGN - "1: \n" - "vmovdqu8 (%0),%%zmm0 \n" - "vmovdqu8 0x40(%0),%%zmm1 \n" - "lea 0x80(%0),%0 \n" - "vpshufb %%zmm5,%%zmm0,%%zmm0 \n" - "vpshufb %%zmm5,%%zmm1,%%zmm1 \n" - "vmovdqu8 %%zmm0,(%1) \n" - "vmovdqu8 %%zmm1,0x40(%1) \n" - "lea 0x80(%1),%1 \n" - "sub $0x20,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "r"(shuffler) // %3 - : "memory", "cc", "xmm0", "xmm1", "xmm5"); -} -#endif // HAS_ARGBSHUFFLEROW_AVX512BW - #ifdef HAS_I422TOYUY2ROW_SSE2 void I422ToYUY2Row_SSE2(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_yuy2, int width) { - asm volatile("sub %1,%2 \n" + asm volatile("sub %1,%2 \n" LABELALIGN - "1: \n" - "movq (%1),%%xmm2 \n" - "movq 0x00(%1,%2,1),%%xmm1 \n" - "add $0x8,%1 \n" - "punpcklbw %%xmm1,%%xmm2 \n" - "movdqu (%0),%%xmm0 \n" - "add $0x10,%0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklbw %%xmm2,%%xmm0 \n" - "punpckhbw %%xmm2,%%xmm1 \n" - "movdqu %%xmm0,(%3) \n" - "movdqu %%xmm1,0x10(%3) \n" - "lea 0x20(%3),%3 \n" - "sub $0x10,%4 \n" - "jg 1b \n" + "1: \n" + "movq (%1),%%xmm2 \n" + "movq 0x00(%1,%2,1),%%xmm1 \n" + "add $0x8,%1 \n" + "punpcklbw %%xmm1,%%xmm2 \n" + "movdqu (%0),%%xmm0 \n" + "add $0x10,%0 \n" + "movdqa %%xmm0,%%xmm1 \n" + "punpcklbw %%xmm2,%%xmm0 \n" + "punpckhbw %%xmm2,%%xmm1 \n" + "movdqu %%xmm0,(%3) \n" + "movdqu %%xmm1,0x10(%3) \n" + "lea 0x20(%3),%3 \n" + "sub $0x10,%4 \n" + "jg 1b \n" : "+r"(src_y), // %0 "+r"(src_u), // %1 "+r"(src_v), // %2 @@ -9081,24 +8944,24 @@ void I422ToUYVYRow_SSE2(const uint8_t* src_y, const uint8_t* src_v, uint8_t* dst_uyvy, int width) { - asm volatile("sub %1,%2 \n" + asm volatile("sub %1,%2 \n" LABELALIGN - "1: \n" - "movq (%1),%%xmm2 \n" - "movq 0x00(%1,%2,1),%%xmm1 \n" - "add $0x8,%1 \n" - "punpcklbw %%xmm1,%%xmm2 \n" - "movdqu (%0),%%xmm0 \n" - "movdqa %%xmm2,%%xmm1 \n" - "add $0x10,%0 \n" - "punpcklbw %%xmm0,%%xmm1 \n" - "punpckhbw %%xmm0,%%xmm2 \n" - "movdqu %%xmm1,(%3) \n" - "movdqu %%xmm2,0x10(%3) \n" - "lea 0x20(%3),%3 \n" - "sub $0x10,%4 \n" - "jg 1b \n" + "1: \n" + "movq (%1),%%xmm2 \n" + "movq 0x00(%1,%2,1),%%xmm1 \n" + "add $0x8,%1 \n" + "punpcklbw %%xmm1,%%xmm2 \n" + "movdqu (%0),%%xmm0 \n" + "movdqa %%xmm2,%%xmm1 \n" + "add $0x10,%0 \n" + "punpcklbw %%xmm0,%%xmm1 \n" + "punpckhbw %%xmm0,%%xmm2 \n" + "movdqu %%xmm1,(%3) \n" + "movdqu %%xmm2,0x10(%3) \n" + "lea 0x20(%3),%3 \n" + "sub $0x10,%4 \n" + "jg 1b \n" : "+r"(src_y), // %0 "+r"(src_u), // %1 "+r"(src_v), // %2 @@ -9115,27 +8978,27 @@ void I422ToYUY2Row_AVX2(const uint8_t* src_y, const uint8_t* src_v, uint8_t* dst_yuy2, int width) { - asm volatile("sub %1,%2 \n" + asm volatile("sub %1,%2 \n" LABELALIGN - "1: \n" - "vpmovzxbw (%1),%%ymm1 \n" - "vpmovzxbw 0x00(%1,%2,1),%%ymm2 \n" - "add $0x10,%1 \n" - "vpsllw $0x8,%%ymm2,%%ymm2 \n" - "vpor %%ymm1,%%ymm2,%%ymm2 \n" - "vmovdqu (%0),%%ymm0 \n" - "add $0x20,%0 \n" - "vpunpcklbw %%ymm2,%%ymm0,%%ymm1 \n" - "vpunpckhbw %%ymm2,%%ymm0,%%ymm2 \n" - "vextractf128 $0x0,%%ymm1,(%3) \n" - "vextractf128 $0x0,%%ymm2,0x10(%3) \n" - "vextractf128 $0x1,%%ymm1,0x20(%3) \n" - "vextractf128 $0x1,%%ymm2,0x30(%3) \n" - "lea 0x40(%3),%3 \n" - "sub $0x20,%4 \n" - "jg 1b \n" - "vzeroupper \n" + "1: \n" + "vpmovzxbw (%1),%%ymm1 \n" + "vpmovzxbw 0x00(%1,%2,1),%%ymm2 \n" + "add $0x10,%1 \n" + "vpsllw $0x8,%%ymm2,%%ymm2 \n" + "vpor %%ymm1,%%ymm2,%%ymm2 \n" + "vmovdqu (%0),%%ymm0 \n" + "add $0x20,%0 \n" + "vpunpcklbw %%ymm2,%%ymm0,%%ymm1 \n" + "vpunpckhbw %%ymm2,%%ymm0,%%ymm2 \n" + "vextractf128 $0x0,%%ymm1,(%3) \n" + "vextractf128 $0x0,%%ymm2,0x10(%3) \n" + "vextractf128 $0x1,%%ymm1,0x20(%3) \n" + "vextractf128 $0x1,%%ymm2,0x30(%3) \n" + "lea 0x40(%3),%3 \n" + "sub $0x20,%4 \n" + "jg 1b \n" + "vzeroupper \n" : "+r"(src_y), // %0 "+r"(src_u), // %1 "+r"(src_v), // %2 @@ -9152,27 +9015,27 @@ void I422ToUYVYRow_AVX2(const uint8_t* src_y, const uint8_t* src_v, uint8_t* dst_uyvy, int width) { - asm volatile("sub %1,%2 \n" + asm volatile("sub %1,%2 \n" LABELALIGN - "1: \n" - "vpmovzxbw (%1),%%ymm1 \n" - "vpmovzxbw 0x00(%1,%2,1),%%ymm2 \n" - "add $0x10,%1 \n" - "vpsllw $0x8,%%ymm2,%%ymm2 \n" - "vpor %%ymm1,%%ymm2,%%ymm2 \n" - "vmovdqu (%0),%%ymm0 \n" - "add $0x20,%0 \n" - "vpunpcklbw %%ymm0,%%ymm2,%%ymm1 \n" - "vpunpckhbw %%ymm0,%%ymm2,%%ymm2 \n" - "vextractf128 $0x0,%%ymm1,(%3) \n" - "vextractf128 $0x0,%%ymm2,0x10(%3) \n" - "vextractf128 $0x1,%%ymm1,0x20(%3) \n" - "vextractf128 $0x1,%%ymm2,0x30(%3) \n" - "lea 0x40(%3),%3 \n" - "sub $0x20,%4 \n" - "jg 1b \n" - "vzeroupper \n" + "1: \n" + "vpmovzxbw (%1),%%ymm1 \n" + "vpmovzxbw 0x00(%1,%2,1),%%ymm2 \n" + "add $0x10,%1 \n" + "vpsllw $0x8,%%ymm2,%%ymm2 \n" + "vpor %%ymm1,%%ymm2,%%ymm2 \n" + "vmovdqu (%0),%%ymm0 \n" + "add $0x20,%0 \n" + "vpunpcklbw %%ymm0,%%ymm2,%%ymm1 \n" + "vpunpckhbw %%ymm0,%%ymm2,%%ymm2 \n" + "vextractf128 $0x0,%%ymm1,(%3) \n" + "vextractf128 $0x0,%%ymm2,0x10(%3) \n" + "vextractf128 $0x1,%%ymm1,0x20(%3) \n" + "vextractf128 $0x1,%%ymm2,0x30(%3) \n" + "lea 0x40(%3),%3 \n" + "sub $0x20,%4 \n" + "jg 1b \n" + "vzeroupper \n" : "+r"(src_y), // %0 "+r"(src_u), // %1 "+r"(src_v), // %2 @@ -9188,47 +9051,47 @@ void ARGBPolynomialRow_SSE2(const uint8_t* src_argb, uint8_t* dst_argb, const float* poly, int width) { - asm volatile("pxor %%xmm3,%%xmm3 \n" + asm volatile("pxor %%xmm3,%%xmm3 \n" // 2 pixel loop. LABELALIGN - "1: \n" - "movq (%0),%%xmm0 \n" - "lea 0x8(%0),%0 \n" - "punpcklbw %%xmm3,%%xmm0 \n" - "movdqa %%xmm0,%%xmm4 \n" - "punpcklwd %%xmm3,%%xmm0 \n" - "punpckhwd %%xmm3,%%xmm4 \n" - "cvtdq2ps %%xmm0,%%xmm0 \n" - "cvtdq2ps %%xmm4,%%xmm4 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm4,%%xmm5 \n" - "mulps 0x10(%3),%%xmm0 \n" - "mulps 0x10(%3),%%xmm4 \n" - "addps (%3),%%xmm0 \n" - "addps (%3),%%xmm4 \n" - "movdqa %%xmm1,%%xmm2 \n" - "movdqa %%xmm5,%%xmm6 \n" - "mulps %%xmm1,%%xmm2 \n" - "mulps %%xmm5,%%xmm6 \n" - "mulps %%xmm2,%%xmm1 \n" - "mulps %%xmm6,%%xmm5 \n" - "mulps 0x20(%3),%%xmm2 \n" - "mulps 0x20(%3),%%xmm6 \n" - "mulps 0x30(%3),%%xmm1 \n" - "mulps 0x30(%3),%%xmm5 \n" - "addps %%xmm2,%%xmm0 \n" - "addps %%xmm6,%%xmm4 \n" - "addps %%xmm1,%%xmm0 \n" - "addps %%xmm5,%%xmm4 \n" - "cvttps2dq %%xmm0,%%xmm0 \n" - "cvttps2dq %%xmm4,%%xmm4 \n" - "packuswb %%xmm4,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "movq %%xmm0,(%1) \n" - "lea 0x8(%1),%1 \n" - "sub $0x2,%2 \n" - "jg 1b \n" + "1: \n" + "movq (%0),%%xmm0 \n" + "lea 0x8(%0),%0 \n" + "punpcklbw %%xmm3,%%xmm0 \n" + "movdqa %%xmm0,%%xmm4 \n" + "punpcklwd %%xmm3,%%xmm0 \n" + "punpckhwd %%xmm3,%%xmm4 \n" + "cvtdq2ps %%xmm0,%%xmm0 \n" + "cvtdq2ps %%xmm4,%%xmm4 \n" + "movdqa %%xmm0,%%xmm1 \n" + "movdqa %%xmm4,%%xmm5 \n" + "mulps 0x10(%3),%%xmm0 \n" + "mulps 0x10(%3),%%xmm4 \n" + "addps (%3),%%xmm0 \n" + "addps (%3),%%xmm4 \n" + "movdqa %%xmm1,%%xmm2 \n" + "movdqa %%xmm5,%%xmm6 \n" + "mulps %%xmm1,%%xmm2 \n" + "mulps %%xmm5,%%xmm6 \n" + "mulps %%xmm2,%%xmm1 \n" + "mulps %%xmm6,%%xmm5 \n" + "mulps 0x20(%3),%%xmm2 \n" + "mulps 0x20(%3),%%xmm6 \n" + "mulps 0x30(%3),%%xmm1 \n" + "mulps 0x30(%3),%%xmm5 \n" + "addps %%xmm2,%%xmm0 \n" + "addps %%xmm6,%%xmm4 \n" + "addps %%xmm1,%%xmm0 \n" + "addps %%xmm5,%%xmm4 \n" + "cvttps2dq %%xmm0,%%xmm0 \n" + "cvttps2dq %%xmm4,%%xmm4 \n" + "packuswb %%xmm4,%%xmm0 \n" + "packuswb %%xmm0,%%xmm0 \n" + "movq %%xmm0,(%1) \n" + "lea 0x8(%1),%1 \n" + "sub $0x2,%2 \n" + "jg 1b \n" : "+r"(src_argb), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 @@ -9316,7 +9179,7 @@ void HalfFloatRow_AVX2(const uint16_t* src, #if defined(__x86_64__) : "x"(scale) // %3 #else - : "m"(scale) // %3 + : "m"(scale) // %3 #endif : "memory", "cc", "xmm2", "xmm3", "xmm4", "xmm5"); } @@ -9354,7 +9217,7 @@ void HalfFloatRow_F16C(const uint16_t* src, #if defined(__x86_64__) : "x"(scale) // %3 #else - : "m"(scale) // %3 + : "m"(scale) // %3 #endif : "memory", "cc", "xmm2", "xmm3", "xmm4"); } @@ -9688,20 +9551,20 @@ static const uvec8 kShuffleUVToVU = {1u, 0u, 3u, 2u, 5u, 4u, 7u, 6u, // Convert UV plane of NV12 to VU of NV21. void SwapUVRow_SSSE3(const uint8_t* src_uv, uint8_t* dst_vu, int width) { - asm volatile("movdqu %3,%%xmm5 \n" + asm volatile("movdqu %3,%%xmm5 \n" LABELALIGN - "1: \n" - "movdqu (%0),%%xmm0 \n" - "movdqu 0x10(%0),%%xmm1 \n" - "lea 0x20(%0),%0 \n" - "pshufb %%xmm5,%%xmm0 \n" - "pshufb %%xmm5,%%xmm1 \n" - "movdqu %%xmm0,(%1) \n" - "movdqu %%xmm1,0x10(%1) \n" - "lea 0x20(%1),%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" + "1: \n" + "movdqu (%0),%%xmm0 \n" + "movdqu 0x10(%0),%%xmm1 \n" + "lea 0x20(%0),%0 \n" + "pshufb %%xmm5,%%xmm0 \n" + "pshufb %%xmm5,%%xmm1 \n" + "movdqu %%xmm0,(%1) \n" + "movdqu %%xmm1,0x10(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" : "+r"(src_uv), // %0 "+r"(dst_vu), // %1 "+r"(width) // %2 @@ -9712,21 +9575,21 @@ void SwapUVRow_SSSE3(const uint8_t* src_uv, uint8_t* dst_vu, int width) { #ifdef HAS_SWAPUVROW_AVX2 void SwapUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_vu, int width) { - asm volatile("vbroadcasti128 %3,%%ymm5 \n" + asm volatile("vbroadcasti128 %3,%%ymm5 \n" LABELALIGN - "1: \n" - "vmovdqu (%0),%%ymm0 \n" - "vmovdqu 0x20(%0),%%ymm1 \n" - "lea 0x40(%0),%0 \n" - "vpshufb %%ymm5,%%ymm0,%%ymm0 \n" - "vpshufb %%ymm5,%%ymm1,%%ymm1 \n" - "vmovdqu %%ymm0,(%1) \n" - "vmovdqu %%ymm1,0x20(%1) \n" - "lea 0x40(%1),%1 \n" - "sub $0x20,%2 \n" - "jg 1b \n" - "vzeroupper \n" + "1: \n" + "vmovdqu (%0),%%ymm0 \n" + "vmovdqu 0x20(%0),%%ymm1 \n" + "lea 0x40(%0),%0 \n" + "vpshufb %%ymm5,%%ymm0,%%ymm0 \n" + "vpshufb %%ymm5,%%ymm1,%%ymm1 \n" + "vmovdqu %%ymm0,(%1) \n" + "vmovdqu %%ymm1,0x20(%1) \n" + "lea 0x40(%1),%1 \n" + "sub $0x20,%2 \n" + "jg 1b \n" + "vzeroupper \n" : "+r"(src_uv), // %0 "+r"(dst_vu), // %1 "+r"(width) // %2 diff --git a/source/row_lasx.cc b/source/row_lasx.cc index e0802c15e..94cb44ed1 100644 --- a/source/row_lasx.cc +++ b/source/row_lasx.cc @@ -2027,12 +2027,10 @@ struct ArgbConstants { // R * 0.2990 coefficient = 77 // Add 0.5 = 0x80 static const struct ArgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0}, - 128, - 0}; + 128, + 0}; -static const struct ArgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, - 128, - 0}; +static const struct ArgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128, 0}; // RGB to BT.601 coefficients // B * 0.1016 coefficient = 25 @@ -2041,19 +2039,19 @@ static const struct ArgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, // Add 16.5 = 0x1080 static const struct ArgbConstants kRgb24I601Constants = {{25, 129, 66, 0}, - 0x1080, - 0}; + 0x1080, + 0}; static const struct ArgbConstants kRawI601Constants = {{66, 129, 25, 0}, - 0x1080, - 0}; + 0x1080, + 0}; #endif // ArgbConstants // ARGB expects first 3 values to contain RGB and 4th value is ignored. void ARGBToYMatrixRow_LASX(const uint8_t* src_argb, - uint8_t* dst_y, - int width, - const struct ArgbConstants* c) { + uint8_t* dst_y, + int width, + const struct ArgbConstants* c) { int32_t shuff[8] = {0, 4, 1, 5, 2, 6, 3, 7}; asm volatile( "xvldrepl.b $xr0, %3, 0 \n\t" // load rgbconstants @@ -2218,14 +2216,18 @@ static void RGBToYMatrixRow_LASX(const uint8_t* src_rgba, "xvst $xr10, %1, 0 \n\t" "addi.d %1, %1, 32 \n\t" "bnez %2, 1b \n\t" - : "+&r"(src_rgba), // %0 - "+&r"(dst_y), // %1 - "+&r"(width) // %2 - : "r"(c), // %3 - "r"(shuff) // %4 + : "+&r"(src_rgba), // %0 + "+&r"(dst_y), // %1 + "+&r"(width) // %2 + : "r"(c), // %3 + "r"(shuff) // %4 : "memory"); } + + + + void ARGBToUVJRow_LASX(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u, diff --git a/source/row_lsx.cc b/source/row_lsx.cc index 3e6d5154c..41689578a 100644 --- a/source/row_lsx.cc +++ b/source/row_lsx.cc @@ -2812,12 +2812,10 @@ struct ArgbConstants { // R * 0.2990 coefficient = 77 // Add 0.5 = 0x80 static const struct ArgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0}, - 128, - 0}; + 128, + 0}; -static const struct ArgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, - 128, - 0}; +static const struct ArgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128, 0}; // RGB to BT.601 coefficients // B * 0.1016 coefficient = 25 @@ -2826,19 +2824,19 @@ static const struct ArgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, // Add 16.5 = 0x1080 static const struct ArgbConstants kRgb24I601Constants = {{25, 129, 66, 0}, - 0x1080, - 0}; + 0x1080, + 0}; static const struct ArgbConstants kRawI601Constants = {{66, 129, 25, 0}, - 0x1080, - 0}; + 0x1080, + 0}; #endif // ArgbConstants // ARGB expects first 3 values to contain RGB and 4th value is ignored. void ARGBToYMatrixRow_LSX(const uint8_t* src_argb, - uint8_t* dst_y, - int width, - const struct ArgbConstants* c) { + uint8_t* dst_y, + int width, + const struct ArgbConstants* c) { asm volatile( "vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants "vldrepl.b $vr1, %3, 1 \n\t" // load rgbconstants @@ -2989,14 +2987,18 @@ static void RGBToYMatrixRow_LSX(const uint8_t* src_rgba, "vst $vr10, %1, 0 \n\t" "addi.d %1, %1, 16 \n\t" "bnez %2, 1b \n\t" - : "+&r"(src_rgba), // %0 - "+&r"(dst_y), // %1 - "+&r"(width) // %2 - : "r"(c), // %3 - "r"(shuff) // %4 + : "+&r"(src_rgba), // %0 + "+&r"(dst_y), // %1 + "+&r"(width) // %2 + : "r"(c), // %3 + "r"(shuff) // %4 : "memory"); } + + + + // undef for unified sources build #undef YUVTORGB_SETUP #undef READYUV422_D diff --git a/source/row_neon.cc b/source/row_neon.cc index 08608005f..257398bbe 100644 --- a/source/row_neon.cc +++ b/source/row_neon.cc @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "libyuv/convert_from_argb.h" // For ArgbConstants #include "libyuv/row.h" +#include "libyuv/convert_from_argb.h" // For ArgbConstants #ifdef __cplusplus namespace libyuv { @@ -272,7 +272,7 @@ void I422ToRGBARow_NEON(const uint8_t* src_y, "subs %[width], %[width], #8 \n" // YUVTORGB // RGBTORGB8 // - STORERGBA // + STORERGBA // "bgt 1b \n" : [src_y] "+r"(src_y), // %[src_y] [src_u] "+r"(src_u), // %[src_u] @@ -325,8 +325,9 @@ void I422ToRGB565Row_NEON(const uint8_t* src_y, YUVTORGB_SETUP "vmov.u8 d6, #255 \n" "1: \n" // - READYUV422 "subs %[width], %[width], #8 \n" YUVTORGB - RGBTORGB8 ARGBTORGB565 + READYUV422 + "subs %[width], %[width], #8 \n" YUVTORGB RGBTORGB8 + ARGBTORGB565 "vst1.8 {q2}, [%[dst_rgb565]]! \n" // store 8 pixels RGB565. "bgt 1b \n" : [src_y] "+r"(src_y), // %[src_y] @@ -1847,54 +1848,45 @@ void ARGBToUV444MatrixRow_NEON(const uint8_t* src_argb, int width, const struct ArgbConstants* c) { asm volatile( - "vld1.8 {d24}, [%4] \n" // load kRGBToU - "vld1.8 {d25}, [%5] \n" // load kRGBToV - "vld1.16 {d26[0]}, [%6] \n" // load kAddUV[0] - "vmovl.s8 q10, d24 \n" // U coeffs (8 shorts) - "vmovl.s8 q11, d25 \n" // V coeffs (8 shorts) - "vdup.16 q6, d26[0] \n" // bias + "vld1.8 {d16}, [%4] \n" // load kRGBToU + "vld1.8 {d17}, [%5] \n" // load kRGBToV + "vld1.16 {d18[0]}, [%6] \n" // load kAddUV[0] + "vabs.s8 d16, d16 \n" // BU, GU, RU + "vabs.s8 d17, d17 \n" // BV, GV, RV + "vdup.8 d20, d16[0] \n" // BU + "vdup.8 d21, d16[1] \n" // GU + "vdup.8 d22, d16[2] \n" // RU + "vdup.8 d23, d17[0] \n" // BV + "vdup.8 d24, d17[1] \n" // GV + "vdup.8 d25, d17[2] \n" // RV + "vdup.16 q15, d18[0] \n" // kAddUV + "1: \n" "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. "subs %3, %3, #8 \n" // 8 processed per loop. + "vmull.u8 q2, d0, d20 \n" // B * BU + "vmlsl.u8 q2, d1, d21 \n" // - G * GU + "vmlsl.u8 q2, d2, d22 \n" // - R * RU - "vmovl.u8 q4, d0 \n" // B - "vmovl.u8 q5, d1 \n" // G - "vmovl.u8 q7, d2 \n" // R - "vmovl.u8 q8, d3 \n" // A + "vmull.u8 q3, d2, d25 \n" // R * RV + "vmlsl.u8 q3, d1, d24 \n" // - G * GV + "vmlsl.u8 q3, d0, d23 \n" // - B * BV - "vdup.16 q12, d20[0] \n" - "vmul.s16 q2, q4, q12 \n" // U = B * U0 - "vdup.16 q12, d20[1] \n" - "vmla.s16 q2, q5, q12 \n" // U += G * U1 - "vdup.16 q12, d20[2] \n" - "vmla.s16 q2, q7, q12 \n" // U += R * U2 - "vdup.16 q12, d20[3] \n" - "vmla.s16 q2, q8, q12 \n" // U += A * U3 - - "vdup.16 q12, d22[0] \n" - "vmul.s16 q3, q4, q12 \n" // V = B * V0 - "vdup.16 q12, d22[1] \n" - "vmla.s16 q3, q5, q12 \n" // V += G * V1 - "vdup.16 q12, d22[2] \n" - "vmla.s16 q3, q7, q12 \n" // V += R * V2 - "vdup.16 q12, d22[3] \n" - "vmla.s16 q3, q8, q12 \n" // V += A * V3 - - "vsubhn.s16 d0, q6, q2 \n" // 128.0 - U - "vsubhn.s16 d1, q6, q3 \n" // 128.0 - V + "vaddhn.u16 d0, q2, q15 \n" // signed -> unsigned + "vaddhn.u16 d1, q3, q15 \n" "vst1.8 {d0}, [%1]! \n" // store 8 pixels U. "vst1.8 {d1}, [%2]! \n" // store 8 pixels V. "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : "r"(&c->kRGBToU), // %4 - "r"(&c->kRGBToV), // %5 - "r"(&c->kAddUV) // %6 - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", - "q10", "q11", "q12"); + : "+r"(src_argb), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+r"(width) // %3 + : "r"(&c->kRGBToU), // %4 + "r"(&c->kRGBToV), // %5 + "r"(&c->kAddUV) // %6 + : "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", + "q12", "q13", "q14", "q15"); } void ARGBToUV444Row_NEON(const uint8_t* src_argb, @@ -1911,6 +1903,7 @@ void ARGBToUVJ444Row_NEON(const uint8_t* src_argb, ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width, &kArgbJPEGConstants); } + // clang-format off // 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16. #define RGBTOUV(QB, QG, QR) \ @@ -1932,68 +1925,61 @@ void ARGBToUVMatrixRow_NEON(const uint8_t* src_argb, int width, const struct ArgbConstants* c) { const uint8_t* src_argb_1 = src_argb + src_stride_argb; - asm volatile( - "vld1.8 {d24}, [%5] \n" // load kRGBToU (8 bytes, - // only 4 used) - "vld1.8 {d25}, [%6] \n" // load kRGBToV - "vmovl.s8 q14, d24 \n" // U coeffs in d28 - "vmovl.s8 q15, d25 \n" // V coeffs in d30 - "vmov.u16 q11, #0x8000 \n" // 128.0 bias + asm volatile ( + "vld1.8 {d18}, [%5] \n" // load kRGBToU + "vld1.8 {d19}, [%6] \n" // load kRGBToV + "vmovl.s8 q8, d18 \n" // U coeffs in q8 (d16, d17) + "vmovl.s8 q9, d19 \n" // V coeffs in q9 (d18, d19) + "vdup.16 q10, d16[0] \n" // U0 + "vdup.16 q11, d16[1] \n" // U1 + "vdup.16 q12, d16[2] \n" // U2 + "vdup.16 q13, d18[0] \n" // V0 + "vdup.16 q14, d18[1] \n" // V1 + "vdup.16 q15, d18[2] \n" // V2 "1: \n" "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. - "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB - // pixels. + "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels. "subs %4, %4, #16 \n" // 16 processed per loop. "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts. "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts. - "vpaddl.u8 q3, q3 \n" // A 16 bytes -> 8 shorts. - "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more pixels. - "vld4.8 {d9, d11, d13, d15}, [%1]! \n" - "vpadal.u8 q0, q4 \n" // B - "vpadal.u8 q1, q5 \n" // G - "vpadal.u8 q2, q6 \n" // R - "vpadal.u8 q3, q7 \n" // A + "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ARGB pixels. + "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ARGB pixels. + "vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts. + "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts. + "vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts. "vrshr.u16 q0, q0, #2 \n" // average of 4 "vrshr.u16 q1, q1, #2 \n" "vrshr.u16 q2, q2, #2 \n" - "vrshr.u16 q3, q3, #2 \n" - "vdup.16 q12, d28[0] \n" - "vmul.s16 q8, q0, q12 \n" // U = B * U0 - "vdup.16 q12, d28[1] \n" - "vmla.s16 q8, q1, q12 \n" // U += G * U1 - "vdup.16 q12, d28[2] \n" + "vmov.u16 q3, #0x8000 \n" // 128.0 + + "vmul.s16 q8, q0, q10 \n" // U = B * U0 + "vmla.s16 q8, q1, q11 \n" // U += G * U1 "vmla.s16 q8, q2, q12 \n" // U += R * U2 - "vdup.16 q12, d28[3] \n" - "vmla.s16 q8, q3, q12 \n" // U += A * U3 - "vdup.16 q12, d30[0] \n" - "vmul.s16 q9, q0, q12 \n" // V = B * V0 - "vdup.16 q12, d30[1] \n" - "vmla.s16 q9, q1, q12 \n" // V += G * V1 - "vdup.16 q12, d30[2] \n" - "vmla.s16 q9, q2, q12 \n" // V += R * V2 - "vdup.16 q12, d30[3] \n" - "vmla.s16 q9, q3, q12 \n" // V += A * V3 + "vmul.s16 q9, q0, q13 \n" // V = B * V0 + "vmla.s16 q9, q1, q14 \n" // V += G * V1 + "vmla.s16 q9, q2, q15 \n" // V += R * V2 - "vsubhn.s16 d0, q11, q8 \n" // 128.0 - U - "vsubhn.s16 d1, q11, q9 \n" // 128.0 - V + "vsubhn.s16 d0, q3, q8 \n" // 128.0 - U + "vsubhn.s16 d1, q3, q9 \n" // 128.0 - V "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(src_argb_1), // %1 - "+r"(dst_u), // %2 - "+r"(dst_v), // %3 - "+r"(width) // %4 - : "r"(&c->kRGBToU), // %5 - "r"(&c->kRGBToV) // %6 - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", - "q9", "q11", "q12", "q14", "q15"); + : "+r"(src_argb), // %0 + "+r"(src_argb_1), // %1 + "+r"(dst_u), // %2 + "+r"(dst_v), // %3 + "+r"(width) // %4 + : "r"(&c->kRGBToU), // %5 + "r"(&c->kRGBToV) // %6 + : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", + "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" + ); } void ARGBToUVRow_NEON(const uint8_t* src_argb, @@ -2226,8 +2212,44 @@ void BGRAToUVRow_NEON(const uint8_t* src_bgra, uint8_t* dst_u, uint8_t* dst_v, int width) { - ARGBToUVMatrixRow_NEON(src_bgra, src_stride_bgra, dst_u, dst_v, width, - &kBgraI601Constants); + asm volatile ( + "add %1, %0, %1 \n" // src_stride + src_bgra + "vmov.s16 q10, #112 \n" // UB/VR 0.875 coefficient + "vmov.s16 q11, #74 \n" // UG -0.5781 coefficient + "vmov.s16 q12, #38 \n" // UR -0.2969 coefficient + "vmov.s16 q13, #18 \n" // VB -0.1406 coefficient + "vmov.s16 q14, #94 \n" // VG -0.7344 coefficient + "vmov.u16 q15, #0x8000 \n" // 128.0 + "1: \n" + "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 BGRA pixels. + "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 BGRA pixels. + "subs %4, %4, #16 \n" // 16 processed per loop. + "vpaddl.u8 q3, q3 \n" // B 16 bytes -> 8 shorts. + "vpaddl.u8 q2, q2 \n" // G 16 bytes -> 8 shorts. + "vpaddl.u8 q1, q1 \n" // R 16 bytes -> 8 shorts. + "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more BGRA pixels. + "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 BGRA pixels. + "vpadal.u8 q3, q7 \n" // B 16 bytes -> 8 shorts. + "vpadal.u8 q2, q6 \n" // G 16 bytes -> 8 shorts. + "vpadal.u8 q1, q5 \n" // R 16 bytes -> 8 shorts. + + "vrshr.u16 q1, q1, #2 \n" // average of 4 + "vrshr.u16 q2, q2, #2 \n" + "vrshr.u16 q3, q3, #2 \n" + + RGBTOUV(q3, q2, q1) + "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. + "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. + "bgt 1b \n" + : "+r"(src_bgra), // %0 + "+r"(src_stride_bgra), // %1 + "+r"(dst_u), // %2- + "+r"(dst_v), // %3 + "+r"(width) // %4 + : + : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", + "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" + ); } void ABGRToUVRow_NEON(const uint8_t* src_abgr, @@ -2235,8 +2257,44 @@ void ABGRToUVRow_NEON(const uint8_t* src_abgr, uint8_t* dst_u, uint8_t* dst_v, int width) { - ARGBToUVMatrixRow_NEON(src_abgr, src_stride_abgr, dst_u, dst_v, width, - &kAbgrI601Constants); + asm volatile ( + "add %1, %0, %1 \n" // src_stride + src_abgr + "vmov.s16 q10, #112 \n" // UB/VR 0.875 coefficient + "vmov.s16 q11, #74 \n" // UG -0.5781 coefficient + "vmov.s16 q12, #38 \n" // UR -0.2969 coefficient + "vmov.s16 q13, #18 \n" // VB -0.1406 coefficient + "vmov.s16 q14, #94 \n" // VG -0.7344 coefficient + "vmov.u16 q15, #0x8000 \n" // 128.0 + "1: \n" + "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ABGR pixels. + "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ABGR pixels. + "subs %4, %4, #16 \n" // 16 processed per loop. + "vpaddl.u8 q2, q2 \n" // B 16 bytes -> 8 shorts. + "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. + "vpaddl.u8 q0, q0 \n" // R 16 bytes -> 8 shorts. + "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ABGR pixels. + "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ABGR pixels. + "vpadal.u8 q2, q6 \n" // B 16 bytes -> 8 shorts. + "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts. + "vpadal.u8 q0, q4 \n" // R 16 bytes -> 8 shorts. + + "vrshr.u16 q0, q0, #2 \n" // average of 4 + "vrshr.u16 q1, q1, #2 \n" + "vrshr.u16 q2, q2, #2 \n" + + RGBTOUV(q2, q1, q0) + "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. + "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. + "bgt 1b \n" + : "+r"(src_abgr), // %0 + "+r"(src_stride_abgr), // %1 + "+r"(dst_u), // %2 + "+r"(dst_v), // %3 + "+r"(width) // %4 + : + : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", + "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" + ); } void RGBAToUVRow_NEON(const uint8_t* src_rgba, @@ -2244,8 +2302,44 @@ void RGBAToUVRow_NEON(const uint8_t* src_rgba, uint8_t* dst_u, uint8_t* dst_v, int width) { - ARGBToUVMatrixRow_NEON(src_rgba, src_stride_rgba, dst_u, dst_v, width, - &kRgbaI601Constants); + asm volatile ( + "add %1, %0, %1 \n" // src_stride + src_rgba + "vmov.s16 q10, #112 \n" // UB/VR 0.875 coefficient + "vmov.s16 q11, #74 \n" // UG -0.5781 coefficient + "vmov.s16 q12, #38 \n" // UR -0.2969 coefficient + "vmov.s16 q13, #18 \n" // VB -0.1406 coefficient + "vmov.s16 q14, #94 \n" // VG -0.7344 coefficient + "vmov.u16 q15, #0x8000 \n" // 128.0 + "1: \n" + "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 RGBA pixels. + "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 RGBA pixels. + "subs %4, %4, #16 \n" // 16 processed per loop. + "vpaddl.u8 q0, q1 \n" // B 16 bytes -> 8 shorts. + "vpaddl.u8 q1, q2 \n" // G 16 bytes -> 8 shorts. + "vpaddl.u8 q2, q3 \n" // R 16 bytes -> 8 shorts. + "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more RGBA pixels. + "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 RGBA pixels. + "vpadal.u8 q0, q5 \n" // B 16 bytes -> 8 shorts. + "vpadal.u8 q1, q6 \n" // G 16 bytes -> 8 shorts. + "vpadal.u8 q2, q7 \n" // R 16 bytes -> 8 shorts. + + "vrshr.u16 q0, q0, #2 \n" // average of 4 + "vrshr.u16 q1, q1, #2 \n" + "vrshr.u16 q2, q2, #2 \n" + + RGBTOUV(q0, q1, q2) + "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. + "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. + "bgt 1b \n" + : "+r"(src_rgba), // %0 + "+r"(src_stride_rgba), // %1 + "+r"(dst_u), // %2 + "+r"(dst_v), // %3 + "+r"(width) // %4 + : + : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", + "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" + ); } void RGB24ToUVRow_NEON(const uint8_t* src_rgb24, @@ -2703,20 +2797,19 @@ void AB64ToARGBRow_NEON(const uint16_t* src_ab64, // ARGB expects first 3 values to contain RGB and 4th value is ignored. void ARGBToYMatrixRow_NEON(const uint8_t* src_argb, - uint8_t* dst_y, - int width, - const struct ArgbConstants* c) { + uint8_t* dst_y, + int width, + const struct ArgbConstants* c) { asm volatile( - "vld1.8 {d24}, [%3] \n" // load kRGBToY - "vld1.16 {d25[0]}, [%4] \n" // load kAddY[0] - "vdup.8 d20, d24[0] \n" // B - "vdup.8 d21, d24[1] \n" // G - "vdup.8 d22, d24[2] \n" // R - "vdup.8 d23, d24[3] \n" // A - "vdup.16 q12, d25[0] \n" // bias + "vld1.8 {d16}, [%3] \n" // load kRGBToY + "vld1.16 {d18[0]}, [%4] \n" // load kAddY[0] + "vdup.8 d20, d16[0] \n" // BY + "vdup.8 d21, d16[1] \n" // GY + "vdup.8 d22, d16[2] \n" // RY + "vdup.16 q12, d18[0] \n" // AY "1: \n" - "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 pixels of ARGB - "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 pixels + "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 16 pixels of ARGB + "vld4.8 {d1, d3, d5, d7}, [%0]! \n" "subs %1, %1, #16 \n" // 16 processed per loop. "vmull.u8 q8, d0, d20 \n" // B "vmull.u8 q9, d1, d20 \n" @@ -2724,8 +2817,6 @@ void ARGBToYMatrixRow_NEON(const uint8_t* src_argb, "vmlal.u8 q9, d3, d21 \n" "vmlal.u8 q8, d4, d22 \n" // R "vmlal.u8 q9, d5, d22 \n" - "vmlal.u8 q8, d6, d23 \n" // A - "vmlal.u8 q9, d7, d23 \n" "vaddhn.u16 d0, q8, q12 \n" // 16 bit to 8 bit Y "vaddhn.u16 d1, q9, q12 \n" "vst1.8 {d0, d1}, [%2]! \n" // store 16 pixels Y. @@ -2735,8 +2826,8 @@ void ARGBToYMatrixRow_NEON(const uint8_t* src_argb, "+r"(dst_y) // %2 : "r"(&c->kRGBToY), // %3 "r"(&c->kAddY) // %4 - : "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", - "d24", "d25"); + : "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "d20", "d21", "d22", + "q12"); } void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) { @@ -2755,33 +2846,65 @@ void ABGRToYJRow_NEON(const uint8_t* src_abgr, uint8_t* dst_yj, int width) { ARGBToYMatrixRow_NEON(src_abgr, dst_yj, width, &kAbgrJPEGConstants); } +// RGBA expects first value to be A and ignored, then 3 values to contain RGB. +// Same code as ARGB, except the LD4 +static void RGBAToYMatrixRow_NEON(const uint8_t* src_rgba, + uint8_t* dst_y, + int width, + const struct ArgbConstants* c) { + asm volatile( + "vld1.8 {d16}, [%3] \n" // load kRGBToY + "vld1.16 {d18[0]}, [%4] \n" // load kAddY[0] + "vdup.8 d20, d16[0] \n" // BY + "vdup.8 d21, d16[1] \n" // GY + "vdup.8 d22, d16[2] \n" // RY + "vdup.16 q12, d18[0] \n" // AY + "1: \n" + "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 16 pixels of RGBA + "vld4.8 {d1, d3, d5, d7}, [%0]! \n" + "subs %2, %2, #16 \n" // 16 processed per loop. + "vmull.u8 q8, d2, d20 \n" // B + "vmull.u8 q9, d3, d20 \n" + "vmlal.u8 q8, d4, d21 \n" // G + "vmlal.u8 q9, d5, d21 \n" + "vmlal.u8 q8, d6, d22 \n" // R + "vmlal.u8 q9, d7, d22 \n" + "vaddhn.u16 d0, q8, q12 \n" // 16 bit to 8 bit Y + "vaddhn.u16 d1, q9, q12 \n" + "vst1.8 {d0, d1}, [%1]! \n" // store 16 pixels Y. + "bgt 1b \n" + : "+r"(src_rgba), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : "r"(&c->kRGBToY), // %3 + "r"(&c->kAddY) // %4 + : "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "d20", "d21", "d22", + "q12"); +} + void RGBAToYRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width) { - ARGBToYMatrixRow_NEON(src_rgba, dst_y, width, &kRgbaI601Constants); + RGBAToYMatrixRow_NEON(src_rgba, dst_y, width, &kArgbI601Constants); } void RGBAToYJRow_NEON(const uint8_t* src_rgba, uint8_t* dst_yj, int width) { - ARGBToYMatrixRow_NEON(src_rgba, dst_yj, width, &kRgbaJPEGConstants); + RGBAToYMatrixRow_NEON(src_rgba, dst_yj, width, &kArgbJPEGConstants); } void BGRAToYRow_NEON(const uint8_t* src_bgra, uint8_t* dst_y, int width) { - ARGBToYMatrixRow_NEON(src_bgra, dst_y, width, &kBgraI601Constants); -} - -void BGRAToYJRow_NEON(const uint8_t* src_bgra, uint8_t* dst_yj, int width) { - ARGBToYMatrixRow_NEON(src_bgra, dst_yj, width, &kBgraJPEGConstants); + RGBAToYMatrixRow_NEON(src_bgra, dst_y, width, &kAbgrI601Constants); } void RGBToYMatrixRow_NEON(const uint8_t* src_rgb, - uint8_t* dst_y, - int width, - const struct ArgbConstants* c) { + uint8_t* dst_y, + int width, + const struct ArgbConstants* c) { asm volatile( - "vld1.8 {d24}, [%3] \n" // load kRGBToY - "vld1.16 {d25[0]}, [%4] \n" // load kAddY[0] - "vdup.8 d20, d24[0] \n" // BY - "vdup.8 d21, d24[1] \n" // GY - "vdup.8 d22, d24[2] \n" // RY - "vdup.16 q12, d25[0] \n" // AY + "vld1.8 {d16}, [%3] \n" // load kRGBToY + "vld1.16 {d18[0]}, [%4] \n" // load kAddY[0] + "vdup.8 d20, d16[0] \n" // BY + "vdup.8 d21, d16[1] \n" // GY + "vdup.8 d22, d16[2] \n" // RY + "vdup.16 q12, d18[0] \n" // AY "1: \n" "vld3.8 {d2, d4, d6}, [%0]! \n" // load 16 pixels of // RGB24. @@ -2802,10 +2925,14 @@ void RGBToYMatrixRow_NEON(const uint8_t* src_rgb, "+r"(width) // %2 : "r"(&c->kRGBToY), // %3 "r"(&c->kAddY) // %4 - : "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", - "d24", "d25"); + : "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "d20", "d21", "d22", + "q12"); } + + + + // Bilinear filter 16x2 -> 16x1 void InterpolateRow_NEON(uint8_t* dst_ptr, const uint8_t* src_ptr, diff --git a/source/row_neon64.cc b/source/row_neon64.cc index f90b4a18b..19016cc3b 100644 --- a/source/row_neon64.cc +++ b/source/row_neon64.cc @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "libyuv/convert_from_argb.h" #include "libyuv/row.h" +#include "libyuv/convert_from_argb.h" #ifdef __cplusplus namespace libyuv { @@ -292,12 +292,12 @@ void I210ToAR30Row_NEON(const uint16_t* src_y, uint16_t limit = 0x3ff0; uint16_t alpha = 0xc000; asm volatile(YUVTORGB_SETUP - "dup v22.8h, %w[limit] \n" - "dup v23.8h, %w[alpha] \n" - "1: \n" // + "dup v22.8h, %w[limit] \n" + "dup v23.8h, %w[alpha] \n" + "1: \n" // READYUV210 - "subs %w[width], %w[width], #8 \n" NVTORGB STOREAR30 - "b.gt 1b \n" + "subs %w[width], %w[width], #8 \n" NVTORGB STOREAR30 + "b.gt 1b \n" : [src_y] "+r"(src_y), // %[src_y] [src_u] "+r"(src_u), // %[src_u] [src_v] "+r"(src_v), // %[src_v] @@ -321,12 +321,12 @@ void I410ToAR30Row_NEON(const uint16_t* src_y, uint16_t limit = 0x3ff0; uint16_t alpha = 0xc000; asm volatile(YUVTORGB_SETUP - "dup v22.8h, %w[limit] \n" - "dup v23.8h, %w[alpha] \n" - "1: \n" // + "dup v22.8h, %w[limit] \n" + "dup v23.8h, %w[alpha] \n" + "1: \n" // READYUV410 - "subs %w[width], %w[width], #8 \n" NVTORGB STOREAR30 - "b.gt 1b \n" + "subs %w[width], %w[width], #8 \n" NVTORGB STOREAR30 + "b.gt 1b \n" : [src_y] "+r"(src_y), // %[src_y] [src_u] "+r"(src_u), // %[src_u] [src_v] "+r"(src_v), // %[src_v] @@ -349,12 +349,12 @@ void I212ToAR30Row_NEON(const uint16_t* src_y, const vec16* rgb_coeff = &yuvconstants->kRGBCoeffBias; const uint16_t limit = 0x3ff0; asm volatile(YUVTORGB_SETUP - "dup v22.8h, %w[limit] \n" - "movi v23.8h, #0xc0, lsl #8 \n" // A - "1: \n" // + "dup v22.8h, %w[limit] \n" + "movi v23.8h, #0xc0, lsl #8 \n" // A + "1: \n" // READYUV212 - "subs %w[width], %w[width], #8 \n" NVTORGB STOREAR30 - "b.gt 1b \n" + "subs %w[width], %w[width], #8 \n" NVTORGB STOREAR30 + "b.gt 1b \n" : [src_y] "+r"(src_y), // %[src_y] [src_u] "+r"(src_u), // %[src_u] [src_v] "+r"(src_v), // %[src_v] @@ -531,13 +531,13 @@ void P210ToAR30Row_NEON(const uint16_t* src_y, const vec16* rgb_coeff = &yuvconstants->kRGBCoeffBias; const uint16_t limit = 0x3ff0; asm volatile(YUVTORGB_SETUP - "dup v22.8h, %w[limit] \n" - "movi v23.8h, #0xc0, lsl #8 \n" // A - "ldr q2, [%[kIndices]] \n" - "1: \n" // + "dup v22.8h, %w[limit] \n" + "movi v23.8h, #0xc0, lsl #8 \n" // A + "ldr q2, [%[kIndices]] \n" + "1: \n" // READYUVP210 - "subs %w[width], %w[width], #8 \n" NVTORGB STOREAR30 - "b.gt 1b \n" + "subs %w[width], %w[width], #8 \n" NVTORGB STOREAR30 + "b.gt 1b \n" : [src_y] "+r"(src_y), // %[src_y] [src_uv] "+r"(src_uv), // %[src_uv] [dst_ar30] "+r"(dst_ar30), // %[dst_ar30] @@ -558,13 +558,13 @@ void P410ToAR30Row_NEON(const uint16_t* src_y, const vec16* rgb_coeff = &yuvconstants->kRGBCoeffBias; uint16_t limit = 0x3ff0; asm volatile(YUVTORGB_SETUP - "dup v22.8h, %w[limit] \n" - "movi v23.8h, #0xc0, lsl #8 \n" // A - "ldr q2, [%[kIndices]] \n" - "1: \n" // + "dup v22.8h, %w[limit] \n" + "movi v23.8h, #0xc0, lsl #8 \n" // A + "ldr q2, [%[kIndices]] \n" + "1: \n" // READYUVP410 - "subs %w[width], %w[width], #8 \n" NVTORGB STOREAR30 - "b.gt 1b \n" + "subs %w[width], %w[width], #8 \n" NVTORGB STOREAR30 + "b.gt 1b \n" : [src_y] "+r"(src_y), // %[src_y] [src_uv] "+r"(src_uv), // %[src_uv] [dst_ar30] "+r"(dst_ar30), // %[dst_ar30] @@ -783,8 +783,9 @@ void I422ToRGB565Row_NEON(const uint8_t* src_y, asm volatile( YUVTORGB_SETUP "1: \n" // - READYUV422 "subs %w[width], %w[width], #8 \n" I4XXTORGB - RGBTORGB8_TOP ARGBTORGB565_FROM_TOP + READYUV422 + "subs %w[width], %w[width], #8 \n" I4XXTORGB RGBTORGB8_TOP + ARGBTORGB565_FROM_TOP "st1 {v18.8h}, [%[dst_rgb565]], #16 \n" // store 8 pixels RGB565. "b.gt 1b \n" : [src_y] "+r"(src_y), // %[src_y] @@ -1035,8 +1036,9 @@ void NV12ToRGB565Row_NEON(const uint8_t* src_y, YUVTORGB_SETUP "ldr q2, [%[kNV12Table]] \n" "1: \n" // - READNV12 "subs %w[width], %w[width], #8 \n" NVTORGB - RGBTORGB8_TOP ARGBTORGB565_FROM_TOP + READNV12 + "subs %w[width], %w[width], #8 \n" NVTORGB RGBTORGB8_TOP + ARGBTORGB565_FROM_TOP "st1 {v18.8h}, [%[dst_rgb565]], #16 \n" // store 8 // pixels // RGB565. @@ -2734,75 +2736,58 @@ struct RgbUVConstants { }; // 8x1 pixels. -void ARGBToUV444MatrixRow_NEON(const uint8_t* src_argb, - uint8_t* dst_u, - uint8_t* dst_v, - int width, - const struct ArgbConstants* c) { +static void ARGBToUV444MatrixRow_NEON( + const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width, + const struct RgbUVConstants* rgbuvconstants) { asm volatile( - "ldr q16, [%[c], #16] \n" // kRGBToU - "ldr q17, [%[c], #32] \n" // kRGBToV - "ldr s0, [%[c], #64] \n" // kAddUV - "sxtl v16.8h, v16.8b \n" // sign extend U coeffs - // to 16-bit - "sxtl v17.8h, v17.8b \n" // sign extend V coeffs - // to 16-bit - "dup v20.8h, v16.h[0] \n" // U0 - "dup v21.8h, v16.h[1] \n" // U1 - "dup v22.8h, v16.h[2] \n" // U2 - "dup v23.8h, v16.h[3] \n" // U3 - "dup v24.8h, v17.h[0] \n" // V0 - "dup v26.8h, v17.h[1] \n" // V1 - "dup v27.8h, v17.h[2] \n" // V2 - "dup v28.8h, v17.h[3] \n" // V3 - "dup v25.8h, v0.h[0] \n" // kAddUV + "ldr d0, [%4] \n" // load rgbuvconstants + "dup v24.16b, v0.b[0] \n" // UB 0.875 coefficient + "dup v25.16b, v0.b[1] \n" // UG -0.5781 coefficient + "dup v26.16b, v0.b[2] \n" // UR -0.2969 coefficient + "dup v27.16b, v0.b[4] \n" // VB -0.1406 coefficient + "dup v28.16b, v0.b[5] \n" // VG -0.7344 coefficient + "neg v24.16b, v24.16b \n" + "movi v29.8h, #0x80, lsl #8 \n" // 128.0 + "1: \n" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB "subs %w3, %w3, #8 \n" // 8 processed per loop. + "umull v4.8h, v0.8b, v24.8b \n" // B + "umlsl v4.8h, v1.8b, v25.8b \n" // G + "umlsl v4.8h, v2.8b, v26.8b \n" // R + "prfm pldl1keep, [%0, 448] \n" - "uxtl v4.8h, v0.8b \n" - "uxtl v5.8h, v1.8b \n" - "uxtl v6.8h, v2.8b \n" - "uxtl v7.8h, v3.8b \n" + "umull v3.8h, v2.8b, v24.8b \n" // R + "umlsl v3.8h, v1.8b, v28.8b \n" // G + "umlsl v3.8h, v0.8b, v27.8b \n" // B - // U = B*U0 + G*U1 + R*U2 + A*U3 - "mul v18.8h, v4.8h, v20.8h \n" - "mla v18.8h, v5.8h, v21.8h \n" - "mla v18.8h, v6.8h, v22.8h \n" - "mla v18.8h, v7.8h, v23.8h \n" + "addhn v0.8b, v4.8h, v29.8h \n" // signed -> unsigned + "addhn v1.8b, v3.8h, v29.8h \n" - // V = B*V0 + G*V1 + R*V2 + A*V3 - "mul v19.8h, v4.8h, v24.8h \n" - "mla v19.8h, v5.8h, v26.8h \n" - "mla v19.8h, v6.8h, v27.8h \n" - "mla v19.8h, v7.8h, v28.8h \n" - - "subhn v0.8b, v25.8h, v18.8h \n" - "subhn v1.8b, v25.8h, v19.8h \n" - - "st1 {v0.8b}, [%1], #8 \n" - "st1 {v1.8b}, [%2], #8 \n" + "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels U. + "st1 {v1.8b}, [%2], #8 \n" // store 8 pixels V. "b.gt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : [c] "r"(c) // %4 - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", - "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", - "v27", "v28"); + : "+r"(src_argb), // %0 + "+r"(dst_u), // %1 + "+r"(dst_v), // %2 + "+r"(width) // %3 + : "r"(rgbuvconstants) // %4 + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v24", "v25", "v26", + "v27", "v28", "v29"); } -static void ARGBToUV444MatrixRow_NEON_I8MM(const uint8_t* src_argb, - uint8_t* dst_u, - uint8_t* dst_v, - int width, - const struct ArgbConstants* c) { +static void ARGBToUV444MatrixRow_NEON_I8MM( + const uint8_t* src_argb, + uint8_t* dst_u, + uint8_t* dst_v, + int width, + const struct RgbUVConstants* rgbuvconstants) { asm volatile( - "ldr q16, [%[c], #16] \n" // kRGBToU - "ldr q17, [%[c], #32] \n" // kRGBToV - "ldr s0, [%[c], #64] \n" // kAddUV - "dup v29.8h, v0.h[0] \n" // 128.0 + "ld2r {v16.4s, v17.4s}, [%[rgbuvconstants]] \n" + "movi v29.8h, #0x80, lsl #8 \n" // 128.0 "1: \n" "ldp q0, q1, [%[src]], #32 \n" "subs %w[width], %w[width], #8 \n" // 8 processed per loop. @@ -2822,11 +2807,11 @@ static void ARGBToUV444MatrixRow_NEON_I8MM(const uint8_t* src_argb, "str d0, [%[dst_u]], #8 \n" // store 8 pixels U. "str d1, [%[dst_v]], #8 \n" // store 8 pixels V. "b.gt 1b \n" - : [src] "+r"(src_argb), // %[src] - [dst_u] "+r"(dst_u), // %[dst_u] - [dst_v] "+r"(dst_v), // %[dst_v] - [width] "+r"(width) // %[width] - : [c] "r"(c) // %[c] + : [src] "+r"(src_argb), // %[src] + [dst_u] "+r"(dst_u), // %[dst_u] + [dst_v] "+r"(dst_v), // %[dst_v] + [width] "+r"(width) // %[width] + : [rgbuvconstants] "r"(rgbuvconstants) // %[rgbuvconstants] : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v16", "v17", "v29"); } @@ -2839,11 +2824,15 @@ static void ARGBToUV444MatrixRow_NEON_I8MM(const uint8_t* src_argb, // VG -0.7344 coefficient = -94 // VR 0.875 coefficient = 112 +static const struct RgbUVConstants kARGBI601UVConstants = {{-112, 74, 38, 0}, + {18, 94, -112, 0}}; + void ARGBToUV444Row_NEON(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, int width) { - ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width, &kArgbI601Constants); + ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width, + &kARGBI601UVConstants); } void ARGBToUV444Row_NEON_I8MM(const uint8_t* src_argb, @@ -2851,14 +2840,26 @@ void ARGBToUV444Row_NEON_I8MM(const uint8_t* src_argb, uint8_t* dst_v, int width) { ARGBToUV444MatrixRow_NEON_I8MM(src_argb, dst_u, dst_v, width, - &kArgbI601Constants); + &kARGBI601UVConstants); } +// RGB to JPEG coefficients +// UB 0.500 coefficient = 128 +// UG -0.33126 coefficient = -85 +// UR -0.16874 coefficient = -43 +// VB -0.08131 coefficient = -21 +// VG -0.41869 coefficient = -107 +// VR 0.500 coefficient = 128 + +static const struct RgbUVConstants kARGBJPEGUVConstants = {{-128, 85, 43, 0}, + {21, 107, -128, 0}}; + void ARGBToUVJ444Row_NEON(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, int width) { - ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width, &kArgbJPEGConstants); + ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width, + &kARGBJPEGUVConstants); } void ARGBToUVJ444Row_NEON_I8MM(const uint8_t* src_argb, @@ -2866,7 +2867,7 @@ void ARGBToUVJ444Row_NEON_I8MM(const uint8_t* src_argb, uint8_t* dst_v, int width) { ARGBToUV444MatrixRow_NEON_I8MM(src_argb, dst_u, dst_v, width, - &kArgbJPEGConstants); + &kARGBJPEGUVConstants); } #define RGBTOUV_SETUP_REG \ @@ -2900,75 +2901,63 @@ void ARGBToUVMatrixRow_NEON(const uint8_t* src_argb, int width, const struct ArgbConstants* c) { const uint8_t* src_argb_1 = src_argb + src_stride_argb; - asm volatile( - "ldr q16, [%[c], #16] \n" // kRGBToU - "ldr q17, [%[c], #32] \n" // kRGBToV - "sxtl v16.8h, v16.8b \n" // sign extend U coeffs - // to 16-bit - "sxtl v17.8h, v17.8b \n" // sign extend V coeffs - // to 16-bit - "dup v20.8h, v16.h[0] \n" // U0 - "dup v21.8h, v16.h[1] \n" // U1 - "dup v22.8h, v16.h[2] \n" // U2 - "dup v23.8h, v16.h[3] \n" // U3 - "dup v24.8h, v17.h[0] \n" // V0 - "dup v26.8h, v17.h[1] \n" // V1 - "dup v27.8h, v17.h[2] \n" // V2 - "dup v28.8h, v17.h[3] \n" // V3 - "movi v25.8h, #0x80, lsl #8 \n" // 128.0 in 16-bit - // (0x8000) + asm volatile ( + "ldr q16, [%[c], #16] \n" // kRGBToU + "ldr q17, [%[c], #32] \n" // kRGBToV + "sxtl v16.8h, v16.8b \n" // sign extend U coeffs to 16-bit + "sxtl v17.8h, v17.8b \n" // sign extend V coeffs to 16-bit + "dup v20.8h, v16.h[0] \n" // U0 (-BU) + "dup v21.8h, v16.h[1] \n" // U1 (-GU) + "dup v22.8h, v16.h[2] \n" // U2 (-RU) + "dup v23.8h, v17.h[0] \n" // V0 (-BV) + "dup v24.8h, v17.h[1] \n" // V1 (-GV) + "dup v26.8h, v17.h[2] \n" // V2 (-RV) + "movi v25.8h, #0x80, lsl #8 \n" // 128.0 in 16-bit (0x8000) "1: \n" - "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 - // pixels. + "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. "subs %w4, %w4, #16 \n" // 16 processed per loop. "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts. "prfm pldl1keep, [%0, 448] \n" "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts. "uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts. - "uaddlp v18.8h, v3.16b \n" // A 16 bytes -> 8 shorts. - "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load 16 - // more. + "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load 16 more. "uadalp v0.8h, v4.16b \n" // B 16 bytes -> 8 shorts. "prfm pldl1keep, [%1, 448] \n" "uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts. "uadalp v2.8h, v6.16b \n" // R 16 bytes -> 8 shorts. - "uadalp v18.8h, v7.16b \n" // A 16 bytes -> 8 shorts. "urshr v0.8h, v0.8h, #2 \n" // average of 4 "urshr v1.8h, v1.8h, #2 \n" "urshr v2.8h, v2.8h, #2 \n" - "urshr v18.8h, v18.8h, #2 \n" - // U = B*U0 + G*U1 + R*U2 + A*U3 - "mul v3.8h, v0.8h, v20.8h \n" - "mla v3.8h, v1.8h, v21.8h \n" - "mla v3.8h, v2.8h, v22.8h \n" - "mla v3.8h, v18.8h, v23.8h \n" + // U = B*U0 + G*U1 + R*U2 + "mul v3.8h, v0.8h, v20.8h \n" + "mla v3.8h, v1.8h, v21.8h \n" + "mla v3.8h, v2.8h, v22.8h \n" - // V = B*V0 + G*V1 + R*V2 + A*V3 - "mul v4.8h, v0.8h, v24.8h \n" - "mla v4.8h, v1.8h, v26.8h \n" - "mla v4.8h, v2.8h, v27.8h \n" - "mla v4.8h, v18.8h, v28.8h \n" + // V = B*V0 + G*V1 + R*V2 + "mul v4.8h, v0.8h, v23.8h \n" + "mla v4.8h, v1.8h, v24.8h \n" + "mla v4.8h, v2.8h, v26.8h \n" // U = (128.0 - U) >> 8, V = (128.0 - V) >> 8 - "subhn v0.8b, v25.8h, v3.8h \n" - "subhn v1.8b, v25.8h, v4.8h \n" + "subhn v0.8b, v25.8h, v3.8h \n" + "subhn v1.8b, v25.8h, v4.8h \n" "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. "b.gt 1b \n" - : "+r"(src_argb), // %0 - "+r"(src_argb_1), // %1 - "+r"(dst_u), // %2 - "+r"(dst_v), // %3 - "+r"(width) // %4 - : [c] "r"(c) // %5 - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", - "v17", "v18", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", - "v28"); + : "+r"(src_argb), // %0 + "+r"(src_argb_1), // %1 + "+r"(dst_u), // %2 + "+r"(dst_v), // %3 + "+r"(width) // %4 + : [c] "r"(c) // %5 + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v16", "v17", "v20", "v21", "v22", "v23", "v24", "v25", "v26" + ); } void ARGBToUVRow_NEON(const uint8_t* src_argb, @@ -2985,35 +2974,44 @@ void ARGBToUVJRow_NEON(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, int width) { - ARGBToUVMatrixRow_NEON(src_argb, src_stride_argb, dst_u, dst_v, width, - &kArgbJPEGConstants); -} + const uint8_t* src_argb_1 = src_argb + src_stride_argb; + asm volatile ( + "movi v20.8h, #128 \n" // UB/VR coeff (0.500) + "movi v21.8h, #85 \n" // UG coeff (-0.33126) + "movi v22.8h, #43 \n" // UR coeff (-0.16874) + "movi v23.8h, #21 \n" // VB coeff (-0.08131) + "movi v24.8h, #107 \n" // VG coeff (-0.41869) + "movi v25.8h, #0x80, lsl #8 \n" // 128.0 (0x8000 in 16-bit) + "1: \n" + "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. + "subs %w4, %w4, #16 \n" // 16 processed per loop. + "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts. + "prfm pldl1keep, [%0, 448] \n" + "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts. + "uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts. + "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load next 16 + "uadalp v0.8h, v4.16b \n" // B 16 bytes -> 8 shorts. + "prfm pldl1keep, [%1, 448] \n" + "uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts. + "uadalp v2.8h, v6.16b \n" // R 16 bytes -> 8 shorts. -void ABGRToUVRow_NEON(const uint8_t* src_abgr, - int src_stride_abgr, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - ARGBToUVMatrixRow_NEON(src_abgr, src_stride_abgr, dst_u, dst_v, width, - &kAbgrI601Constants); -} + "urshr v0.8h, v0.8h, #2 \n" // average of 4 + "urshr v1.8h, v1.8h, #2 \n" + "urshr v2.8h, v2.8h, #2 \n" -void BGRAToUVRow_NEON(const uint8_t* src_bgra, - int src_stride_bgra, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - ARGBToUVMatrixRow_NEON(src_bgra, src_stride_bgra, dst_u, dst_v, width, - &kBgraI601Constants); -} - -void RGBAToUVRow_NEON(const uint8_t* src_rgba, - int src_stride_rgba, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - ARGBToUVMatrixRow_NEON(src_rgba, src_stride_rgba, dst_u, dst_v, width, - &kRgbaI601Constants); + RGBTOUV(v0.8h, v1.8h, v2.8h) + "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. + "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. + "b.gt 1b \n" + : "+r"(src_argb), // %0 + "+r"(src_argb_1), // %1 + "+r"(dst_u), // %2 + "+r"(dst_v), // %3 + "+r"(width) // %4 + : + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v20", "v21", "v22", "v23", "v24", "v25" + ); } void ABGRToUVJRow_NEON(const uint8_t* src_abgr, @@ -3021,8 +3019,44 @@ void ABGRToUVJRow_NEON(const uint8_t* src_abgr, uint8_t* dst_uj, uint8_t* dst_vj, int width) { - ARGBToUVMatrixRow_NEON(src_abgr, src_stride_abgr, dst_uj, dst_vj, width, - &kAbgrJPEGConstants); + const uint8_t* src_abgr_1 = src_abgr + src_stride_abgr; + asm volatile ( + "movi v20.8h, #128 \n" // UB/VR coeff (0.500) + "movi v21.8h, #85 \n" // UG coeff (-0.33126) + "movi v22.8h, #43 \n" // UR coeff (-0.16874) + "movi v23.8h, #21 \n" // VB coeff (-0.08131) + "movi v24.8h, #107 \n" // VG coeff (-0.41869) + "movi v25.8h, #0x80, lsl #8 \n" // 128.0 (0x8000 in 16-bit) + "1: \n" + "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. + "subs %w4, %w4, #16 \n" // 16 processed per loop. + "uaddlp v0.8h, v0.16b \n" // R 16 bytes -> 8 shorts. + "prfm pldl1keep, [%0, 448] \n" + "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts. + "uaddlp v2.8h, v2.16b \n" // B 16 bytes -> 8 shorts. + "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load next 16 + "uadalp v0.8h, v4.16b \n" // R 16 bytes -> 8 shorts. + "prfm pldl1keep, [%1, 448] \n" + "uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts. + "uadalp v2.8h, v6.16b \n" // B 16 bytes -> 8 shorts. + + "urshr v0.8h, v0.8h, #2 \n" // average of 4 + "urshr v1.8h, v1.8h, #2 \n" + "urshr v2.8h, v2.8h, #2 \n" + + RGBTOUV(v2.8h, v1.8h, v0.8h) + "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. + "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. + "b.gt 1b \n" + : "+r"(src_abgr), // %0 + "+r"(src_abgr_1), // %1 + "+r"(dst_uj), // %2 + "+r"(dst_vj), // %3 + "+r"(width) // %4 + : + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v20", "v21", "v22", "v23", "v24", "v25" + ); } void RGB24ToUVJRow_NEON(const uint8_t* src_rgb24, @@ -3115,6 +3149,126 @@ void RAWToUVJRow_NEON(const uint8_t* src_raw, ); } +void BGRAToUVRow_NEON(const uint8_t* src_bgra, + int src_stride_bgra, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + const uint8_t* src_bgra_1 = src_bgra + src_stride_bgra; + asm volatile ( + RGBTOUV_SETUP_REG + "1: \n" + "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. + "subs %w4, %w4, #16 \n" // 16 processed per loop. + "uaddlp v0.8h, v3.16b \n" // B 16 bytes -> 8 shorts. + "prfm pldl1keep, [%0, 448] \n" + "uaddlp v3.8h, v2.16b \n" // G 16 bytes -> 8 shorts. + "uaddlp v2.8h, v1.16b \n" // R 16 bytes -> 8 shorts. + "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load 16 more + "uadalp v0.8h, v7.16b \n" // B 16 bytes -> 8 shorts. + "prfm pldl1keep, [%1, 448] \n" + "uadalp v3.8h, v6.16b \n" // G 16 bytes -> 8 shorts. + "uadalp v2.8h, v5.16b \n" // R 16 bytes -> 8 shorts. + + "urshr v0.8h, v0.8h, #2 \n" // average of 4 + "urshr v1.8h, v3.8h, #2 \n" + "urshr v2.8h, v2.8h, #2 \n" + + RGBTOUV(v0.8h, v1.8h, v2.8h) + "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. + "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. + "b.gt 1b \n" + : "+r"(src_bgra), // %0 + "+r"(src_bgra_1), // %1 + "+r"(dst_u), // %2 + "+r"(dst_v), // %3 + "+r"(width) // %4 + : + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v20", "v21", "v22", "v23", "v24", "v25" + ); +} + +void ABGRToUVRow_NEON(const uint8_t* src_abgr, + int src_stride_abgr, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + const uint8_t* src_abgr_1 = src_abgr + src_stride_abgr; + asm volatile ( + RGBTOUV_SETUP_REG + "1: \n" + "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. + "subs %w4, %w4, #16 \n" // 16 processed per loop. + "uaddlp v3.8h, v2.16b \n" // B 16 bytes -> 8 shorts. + "prfm pldl1keep, [%0, 448] \n" + "uaddlp v2.8h, v1.16b \n" // G 16 bytes -> 8 shorts. + "uaddlp v1.8h, v0.16b \n" // R 16 bytes -> 8 shorts. + "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load 16 more. + "uadalp v3.8h, v6.16b \n" // B 16 bytes -> 8 shorts. + "prfm pldl1keep, [%1, 448] \n" + "uadalp v2.8h, v5.16b \n" // G 16 bytes -> 8 shorts. + "uadalp v1.8h, v4.16b \n" // R 16 bytes -> 8 shorts. + + "urshr v0.8h, v3.8h, #2 \n" // average of 4 + "urshr v2.8h, v2.8h, #2 \n" + "urshr v1.8h, v1.8h, #2 \n" + + RGBTOUV(v0.8h, v2.8h, v1.8h) + "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. + "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. + "b.gt 1b \n" + : "+r"(src_abgr), // %0 + "+r"(src_abgr_1), // %1 + "+r"(dst_u), // %2 + "+r"(dst_v), // %3 + "+r"(width) // %4 + : + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v20", "v21", "v22", "v23", "v24", "v25" + ); +} + +void RGBAToUVRow_NEON(const uint8_t* src_rgba, + int src_stride_rgba, + uint8_t* dst_u, + uint8_t* dst_v, + int width) { + const uint8_t* src_rgba_1 = src_rgba + src_stride_rgba; + asm volatile ( + RGBTOUV_SETUP_REG + "1: \n" + "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. + "subs %w4, %w4, #16 \n" // 16 processed per loop. + "uaddlp v0.8h, v1.16b \n" // B 16 bytes -> 8 shorts. + "prfm pldl1keep, [%0, 448] \n" + "uaddlp v1.8h, v2.16b \n" // G 16 bytes -> 8 shorts. + "uaddlp v2.8h, v3.16b \n" // R 16 bytes -> 8 shorts. + "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load 16 more. + "uadalp v0.8h, v5.16b \n" // B 16 bytes -> 8 shorts. + "prfm pldl1keep, [%1, 448] \n" + "uadalp v1.8h, v6.16b \n" // G 16 bytes -> 8 shorts. + "uadalp v2.8h, v7.16b \n" // R 16 bytes -> 8 shorts. + + "urshr v0.8h, v0.8h, #2 \n" // average of 4 + "urshr v1.8h, v1.8h, #2 \n" + "urshr v2.8h, v2.8h, #2 \n" + + RGBTOUV(v0.8h, v1.8h, v2.8h) + "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. + "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. + "b.gt 1b \n" + : "+r"(src_rgba), // %0 + "+r"(src_rgba_1), // %1 + "+r"(dst_u), // %2 + "+r"(dst_v), // %3 + "+r"(width) // %4 + : + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v20", "v21", "v22", "v23", "v24", "v25" + ); +} + void RGB24ToUVRow_NEON(const uint8_t* src_rgb24, int src_stride_rgb24, uint8_t* dst_u, @@ -3329,19 +3483,18 @@ void ARGB4444ToUVRow_NEON(const uint8_t* src_argb4444, ); } -// Process any of ARGB, ABGR, BGRA, RGBA, by adjusting the ArgbConstants layout. +// Process any of ARGB, ABGR, BGRA, RGBA, by adjusting the uvconstants layout. static void ARGBToUVMatrixRow_NEON_I8MM_Impl(const uint8_t* src, - int src_stride, - uint8_t* dst_u, - uint8_t* dst_v, - int width, - const struct ArgbConstants* c) { + int src_stride, + uint8_t* dst_u, + uint8_t* dst_v, + int width, + const int8_t* uvconstants) { const uint8_t* src1 = src + src_stride; asm volatile( "movi v23.8h, #0x80, lsl #8 \n" // 128.0 (0x8000 in // 16-bit) - "ldr q24, [%[c], #16] \n" // kRGBToU - "ldr q25, [%[c], #32] \n" // kRGBToV + "ld2r {v24.4s, v25.4s}, [%[uvconstants]] \n" "1: \n" "ld2 {v0.4s, v1.4s}, [%[src]], #32 \n" // load 8 pixels @@ -3389,24 +3542,56 @@ static void ARGBToUVMatrixRow_NEON_I8MM_Impl(const uint8_t* src, "str d0, [%[dst_u]], #8 \n" // store 8 pixels U "str d1, [%[dst_v]], #8 \n" // store 8 pixels V "b.gt 1b \n" - : [src] "+r"(src), // %[src] - [src1] "+r"(src1), // %[src1] - [dst_u] "+r"(dst_u), // %[dst_u] - [dst_v] "+r"(dst_v), // %[dst_v] - [width] "+r"(width) // %[width] - : [c] "r"(c) // %[c] + : [src] "+r"(src), // %[src] + [src1] "+r"(src1), // %[src1] + [dst_u] "+r"(dst_u), // %[dst_u] + [dst_v] "+r"(dst_v), // %[dst_v] + [width] "+r"(width) // %[width] + : [uvconstants] "r"(uvconstants) // %[uvconstants] : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v23", "v24", "v25"); } +// RGB to BT601 coefficients +// UB 0.875 coefficient = 112 +// UG -0.5781 coefficient = -74 +// UR -0.2969 coefficient = -38 +// VB -0.1406 coefficient = -18 +// VG -0.7344 coefficient = -94 +// VR 0.875 coefficient = 112 +// I8MM constants are stored negated such that we can store 128 in int8_t. + +static const int8_t kARGBToUVCoefficients[] = { + // -UB, -UG, -UR, 0, -VB, -VG, -VR, 0 + -112, 74, 38, 0, 18, 94, -112, 0, +}; + +static const int8_t kABGRToUVCoefficients[] = { + // -UR, -UG, -UB, 0, -VR, -VG, -VB, 0 + 38, 74, -112, 0, -112, 94, 18, 0, +}; + +static const int8_t kBGRAToUVCoefficients[] = { + // 0, -UR, -UG, -UB, 0, -VR, -VG, -VB + 0, 38, 74, -112, 0, -112, 94, 18, +}; + +static const int8_t kRGBAToUVCoefficients[] = { + // 0, -UB, -UG, -UR, 0, -VB, -VG, -VR + 0, -112, 74, 38, 0, 18, 94, -112, +}; + void ARGBToUVMatrixRow_NEON_I8MM(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width, const struct ArgbConstants* c) { - ARGBToUVMatrixRow_NEON_I8MM_Impl(src_argb, src_stride_argb, dst_u, dst_v, - width, c); + int8_t uvconstants[8] = { + (int8_t)c->kRGBToU[0], (int8_t)c->kRGBToU[1], (int8_t)c->kRGBToU[2], (int8_t)c->kRGBToU[3], + (int8_t)c->kRGBToV[0], (int8_t)c->kRGBToV[1], (int8_t)c->kRGBToV[2], (int8_t)c->kRGBToV[3]}; + ARGBToUVMatrixRow_NEON_I8MM_Impl(src_argb, src_stride_argb, dst_u, dst_v, width, + uvconstants); } void ARGBToUVRow_NEON_I8MM(const uint8_t* src_argb, @@ -3414,8 +3599,8 @@ void ARGBToUVRow_NEON_I8MM(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, int width) { - ARGBToUVMatrixRow_NEON_I8MM_Impl(src_argb, src_stride_argb, dst_u, dst_v, - width, &kArgbI601Constants); + ARGBToUVMatrixRow_NEON_I8MM_Impl(src_argb, src_stride_argb, dst_u, dst_v, width, + kARGBToUVCoefficients); } void ABGRToUVRow_NEON_I8MM(const uint8_t* src_abgr, @@ -3423,8 +3608,8 @@ void ABGRToUVRow_NEON_I8MM(const uint8_t* src_abgr, uint8_t* dst_u, uint8_t* dst_v, int width) { - ARGBToUVMatrixRow_NEON_I8MM_Impl(src_abgr, src_stride_abgr, dst_u, dst_v, - width, &kAbgrI601Constants); + ARGBToUVMatrixRow_NEON_I8MM_Impl(src_abgr, src_stride_abgr, dst_u, dst_v, width, + kABGRToUVCoefficients); } void BGRAToUVRow_NEON_I8MM(const uint8_t* src_bgra, @@ -3432,8 +3617,8 @@ void BGRAToUVRow_NEON_I8MM(const uint8_t* src_bgra, uint8_t* dst_u, uint8_t* dst_v, int width) { - ARGBToUVMatrixRow_NEON_I8MM_Impl(src_bgra, src_stride_bgra, dst_u, dst_v, - width, &kBgraI601Constants); + ARGBToUVMatrixRow_NEON_I8MM_Impl(src_bgra, src_stride_bgra, dst_u, dst_v, width, + kBGRAToUVCoefficients); } void RGBAToUVRow_NEON_I8MM(const uint8_t* src_rgba, @@ -3441,17 +3626,36 @@ void RGBAToUVRow_NEON_I8MM(const uint8_t* src_rgba, uint8_t* dst_u, uint8_t* dst_v, int width) { - ARGBToUVMatrixRow_NEON_I8MM_Impl(src_rgba, src_stride_rgba, dst_u, dst_v, - width, &kRgbaI601Constants); + ARGBToUVMatrixRow_NEON_I8MM_Impl(src_rgba, src_stride_rgba, dst_u, dst_v, width, + kRGBAToUVCoefficients); } +// RGB to JPEG coefficients +// UB 0.500 coefficient = 128 +// UG -0.33126 coefficient = -85 +// UR -0.16874 coefficient = -43 +// VB -0.08131 coefficient = -21 +// VG -0.41869 coefficient = -107 +// VR 0.500 coefficient = 128 +// I8MM constants are stored negated such that we can store 128 in int8_t. + +static const int8_t kARGBToUVJCoefficients[] = { + // -UB, -UG, -UR, 0, -VB, -VG, -VR, 0 + -128, 85, 43, 0, 21, 107, -128, 0, +}; + +static const int8_t kABGRToUVJCoefficients[] = { + // -UR, -UG, -UB, 0, -VR, -VG, -VB, 0 + 43, 85, -128, 0, -128, 107, 21, 0, +}; + void ARGBToUVJRow_NEON_I8MM(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width) { - ARGBToUVMatrixRow_NEON_I8MM_Impl(src_argb, src_stride_argb, dst_u, dst_v, - width, &kArgbJPEGConstants); + ARGBToUVMatrixRow_NEON_I8MM_Impl(src_argb, src_stride_argb, dst_u, dst_v, width, + kARGBToUVJCoefficients); } void ABGRToUVJRow_NEON_I8MM(const uint8_t* src_abgr, @@ -3459,8 +3663,8 @@ void ABGRToUVJRow_NEON_I8MM(const uint8_t* src_abgr, uint8_t* dst_u, uint8_t* dst_v, int width) { - ARGBToUVMatrixRow_NEON_I8MM_Impl(src_abgr, src_stride_abgr, dst_u, dst_v, - width, &kAbgrJPEGConstants); + ARGBToUVMatrixRow_NEON_I8MM_Impl(src_abgr, src_stride_abgr, dst_u, dst_v, width, + kABGRToUVJCoefficients); } void RGB565ToYRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_y, int width) { @@ -3559,184 +3763,251 @@ void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444, : "cc", "memory", "v0", "v1", "v2", "v3", "v24", "v25", "v26", "v27"); } + + // ARGB expects first 3 values to contain RGB and 4th value is ignored. void ARGBToYMatrixRow_NEON(const uint8_t* src_argb, - uint8_t* dst_y, - int width, - const struct ArgbConstants* c) { + uint8_t* dst_y, + int width, + const struct ArgbConstants* c) { asm volatile( - "ldr s16, [%3] \n" // load 4 coeffs - "ldr s17, [%3, #48] \n" // load kAddY[0] - "dup v18.16b, v16.b[0] \n" // B - "dup v19.16b, v16.b[1] \n" // G - "dup v20.16b, v16.b[2] \n" // R - "dup v21.16b, v16.b[3] \n" // A - "dup v22.8h, v17.h[0] \n" // bias + "ldr s0, [%3] \n" // load rgbconstants + "ldr s1, [%3, #48] \n" + "dup v6.16b, v0.b[0] \n" + "dup v7.16b, v0.b[1] \n" + "dup v16.16b, v0.b[2] \n" + "dup v17.8h, v1.h[0] \n" "1: \n" "ld4 {v2.16b,v3.16b,v4.16b,v5.16b}, [%0], #64 \n" // load 16 + // pixels. "subs %w2, %w2, #16 \n" // 16 processed per loop. - "umull v0.8h, v2.8b, v18.8b \n" // B - "umull2 v1.8h, v2.16b, v18.16b \n" + "umull v0.8h, v2.8b, v6.8b \n" // B + "umull2 v1.8h, v2.16b, v6.16b \n" "prfm pldl1keep, [%0, 448] \n" - "umlal v0.8h, v3.8b, v19.8b \n" // G - "umlal2 v1.8h, v3.16b, v19.16b \n" - "umlal v0.8h, v4.8b, v20.8b \n" // R - "umlal2 v1.8h, v4.16b, v20.16b \n" - "umlal v0.8h, v5.8b, v21.8b \n" // A - "umlal2 v1.8h, v5.16b, v21.16b \n" - "addhn v0.8b, v0.8h, v22.8h \n" // 16 bit to 8 bit Y - "addhn v1.8b, v1.8h, v22.8h \n" + "umlal v0.8h, v3.8b, v7.8b \n" // G + "umlal2 v1.8h, v3.16b, v7.16b \n" + "umlal v0.8h, v4.8b, v16.8b \n" // R + "umlal2 v1.8h, v4.16b, v16.16b \n" + "addhn v0.8b, v0.8h, v17.8h \n" // 16 bit to 8 bit Y + "addhn v1.8b, v1.8h, v17.8h \n" "st1 {v0.8b, v1.8b}, [%1], #16 \n" // store 16 pixels Y. "b.gt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : "r"(c) // %3 - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v16", "v17", "v18", - "v19", "v20", "v21", "v22"); + : "+r"(src_argb), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : "r"(c) // %3 + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", + "v17"); } -void ARGBToYMatrixRow_NEON_DotProd(const uint8_t* src_argb, - uint8_t* dst_y, - int width, - const struct ArgbConstants* c) { +void ARGBToYMatrixRow_NEON_DotProd( + const uint8_t* src_argb, + uint8_t* dst_y, + int width, + const struct ArgbConstants* c) { asm volatile( - "ldr s16, [%3] \n" // load 4 coeffs - "ldr s17, [%3, #48] \n" // load kAddY[0] - "dup v18.4s, v16.s[0] \n" - "dup v19.8h, v17.h[0] \n" + "ldr s0, [%3] \n" // load rgbconstants + "ldr s1, [%3, #48] \n" + "dup v16.4s, v0.s[0] \n" + "dup v17.8h, v1.h[0] \n" "1: \n" "ld1 {v4.16b, v5.16b, v6.16b, v7.16b}, [%0], #64 \n" // load 16 + // pixels. "subs %w2, %w2, #16 \n" // 16 processed per loop. "movi v0.16b, #0 \n" "movi v1.16b, #0 \n" "movi v2.16b, #0 \n" "movi v3.16b, #0 \n" - "udot v0.4s, v4.16b, v18.16b \n" - "udot v1.4s, v5.16b, v18.16b \n" - "udot v2.4s, v6.16b, v18.16b \n" - "udot v3.4s, v7.16b, v18.16b \n" + "udot v0.4s, v4.16b, v16.16b \n" + "udot v1.4s, v5.16b, v16.16b \n" + "udot v2.4s, v6.16b, v16.16b \n" + "udot v3.4s, v7.16b, v16.16b \n" "uzp1 v0.8h, v0.8h, v1.8h \n" "uzp1 v1.8h, v2.8h, v3.8h \n" - "addhn v0.8b, v0.8h, v19.8h \n" - "addhn v1.8b, v1.8h, v19.8h \n" + "addhn v0.8b, v0.8h, v17.8h \n" + "addhn v1.8b, v1.8h, v17.8h \n" "st1 {v0.8b, v1.8b}, [%1], #16 \n" // store 16 pixels Y. "b.gt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : "r"(c) // %3 + : "+r"(src_argb), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : "r"(c) // %3 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", - "v17", "v18", "v19"); + "v17"); } // RGB to JPeg coefficients +// B * 0.1140 coefficient = 29 +// G * 0.5870 coefficient = 150 +// R * 0.2990 coefficient = 77 +// Add 0.5 +static const struct ArgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0}, {}, {}, {0x0080}, {}}; +static const struct ArgbConstants kRgb24JPEGDotProdConstants = {{0, 29, 150, 77}, {}, {}, {0x0080}, {}}; + +static const struct ArgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, {}, {}, {0x0080}, {}}; + +// RGB to BT.601 coefficients +// B * 0.1016 coefficient = 25 +// G * 0.5078 coefficient = 129 +// R * 0.2578 coefficient = 66 +// Add 16.5 = 0x1080 + +static const struct ArgbConstants kRgb24I601Constants = {{25, 129, 66, 0}, {}, {}, {0x1080}, {}}; +static const struct ArgbConstants kRgb24I601DotProdConstants = {{0, 25, 129, 66}, {}, {}, {0x1080}, {}}; + +static const struct ArgbConstants kRawI601Constants = {{66, 129, 25, 0}, {}, {}, {0x1080}, {}}; +static const struct ArgbConstants kRawI601DotProdConstants = {{0, 66, 129, 25}, {}, {}, {0x1080}, {}}; void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) { - ARGBToYMatrixRow_NEON(src_argb, dst_y, width, &kArgbI601Constants); + ARGBToYMatrixRow_NEON(src_argb, dst_y, width, &kRgb24I601Constants); } void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_yj, int width) { - ARGBToYMatrixRow_NEON(src_argb, dst_yj, width, &kArgbJPEGConstants); + ARGBToYMatrixRow_NEON(src_argb, dst_yj, width, &kRgb24JPEGConstants); } void ABGRToYRow_NEON(const uint8_t* src_abgr, uint8_t* dst_y, int width) { - ARGBToYMatrixRow_NEON(src_abgr, dst_y, width, &kAbgrI601Constants); + ARGBToYMatrixRow_NEON(src_abgr, dst_y, width, &kRawI601Constants); } void ABGRToYJRow_NEON(const uint8_t* src_abgr, uint8_t* dst_yj, int width) { - ARGBToYMatrixRow_NEON(src_abgr, dst_yj, width, &kAbgrJPEGConstants); + ARGBToYMatrixRow_NEON(src_abgr, dst_yj, width, &kRawJPEGConstants); } void ARGBToYRow_NEON_DotProd(const uint8_t* src_argb, uint8_t* dst_y, int width) { - ARGBToYMatrixRow_NEON_DotProd(src_argb, dst_y, width, &kArgbI601Constants); + ARGBToYMatrixRow_NEON_DotProd(src_argb, dst_y, width, &kRgb24I601Constants); } void ARGBToYJRow_NEON_DotProd(const uint8_t* src_argb, uint8_t* dst_yj, int width) { - ARGBToYMatrixRow_NEON_DotProd(src_argb, dst_yj, width, &kArgbJPEGConstants); + ARGBToYMatrixRow_NEON_DotProd(src_argb, dst_yj, width, &kRgb24JPEGConstants); } void ABGRToYRow_NEON_DotProd(const uint8_t* src_abgr, uint8_t* dst_y, int width) { - ARGBToYMatrixRow_NEON_DotProd(src_abgr, dst_y, width, &kAbgrI601Constants); + ARGBToYMatrixRow_NEON_DotProd(src_abgr, dst_y, width, &kRawI601Constants); } void ABGRToYJRow_NEON_DotProd(const uint8_t* src_abgr, uint8_t* dst_yj, int width) { - ARGBToYMatrixRow_NEON_DotProd(src_abgr, dst_yj, width, &kAbgrJPEGConstants); + ARGBToYMatrixRow_NEON_DotProd(src_abgr, dst_yj, width, &kRawJPEGConstants); } // RGBA expects first value to be A and ignored, then 3 values to contain RGB. +// Same code as ARGB, except the LD4 +static void RGBAToYMatrixRow_NEON(const uint8_t* src_rgba, + uint8_t* dst_y, + int width, + const struct ArgbConstants* c) { + asm volatile( + "ldr s0, [%3] \n" // load rgbconstants + "ldr s1, [%3, #48] \n" + "dup v6.16b, v0.b[0] \n" + "dup v7.16b, v0.b[1] \n" + "dup v16.16b, v0.b[2] \n" + "dup v17.8h, v1.h[0] \n" + "1: \n" + "ld4 {v1.16b,v2.16b,v3.16b,v4.16b}, [%0], #64 \n" // load 16 + // pixels. + "subs %w2, %w2, #16 \n" // 16 processed per loop. + "umull v0.8h, v2.8b, v6.8b \n" // B + "umull2 v1.8h, v2.16b, v6.16b \n" + "prfm pldl1keep, [%0, 448] \n" + "umlal v0.8h, v3.8b, v7.8b \n" // G + "umlal2 v1.8h, v3.16b, v7.16b \n" + "umlal v0.8h, v4.8b, v16.8b \n" // R + "umlal2 v1.8h, v4.16b, v16.16b \n" + "addhn v0.8b, v0.8h, v17.8h \n" // 16 bit to 8 bit Y + "addhn v1.8b, v1.8h, v17.8h \n" + "st1 {v0.8b, v1.8b}, [%1], #16 \n" // store 16 pixels Y. + "b.gt 1b \n" + : "+r"(src_rgba), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : "r"(c) // %3 + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", + "v17"); +} void RGBAToYRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width) { - ARGBToYMatrixRow_NEON(src_rgba, dst_y, width, &kRgbaI601Constants); + RGBAToYMatrixRow_NEON(src_rgba, dst_y, width, &kRgb24I601Constants); } void RGBAToYJRow_NEON(const uint8_t* src_rgba, uint8_t* dst_yj, int width) { - ARGBToYMatrixRow_NEON(src_rgba, dst_yj, width, &kRgbaJPEGConstants); + RGBAToYMatrixRow_NEON(src_rgba, dst_yj, width, &kRgb24JPEGConstants); } void BGRAToYRow_NEON(const uint8_t* src_bgra, uint8_t* dst_y, int width) { - ARGBToYMatrixRow_NEON(src_bgra, dst_y, width, &kBgraI601Constants); + RGBAToYMatrixRow_NEON(src_bgra, dst_y, width, &kRawI601Constants); } void RGBAToYRow_NEON_DotProd(const uint8_t* src_rgba, uint8_t* dst_y, int width) { - ARGBToYMatrixRow_NEON_DotProd(src_rgba, dst_y, width, &kRgbaI601Constants); + // No need for a separate implementation for RGBA inputs, just permute the + // RGB constants. + ARGBToYMatrixRow_NEON_DotProd(src_rgba, dst_y, width, + &kRgb24I601DotProdConstants); } void RGBAToYJRow_NEON_DotProd(const uint8_t* src_rgba, uint8_t* dst_yj, int width) { - ARGBToYMatrixRow_NEON_DotProd(src_rgba, dst_yj, width, &kRgbaJPEGConstants); + // No need for a separate implementation for RGBA inputs, just permute the + // RGB constants. + ARGBToYMatrixRow_NEON_DotProd(src_rgba, dst_yj, width, + &kRgb24JPEGDotProdConstants); } void BGRAToYRow_NEON_DotProd(const uint8_t* src_bgra, uint8_t* dst_y, int width) { - ARGBToYMatrixRow_NEON_DotProd(src_bgra, dst_y, width, &kBgraI601Constants); + // No need for a separate implementation for RGBA inputs, just permute the + // RGB constants. + ARGBToYMatrixRow_NEON_DotProd(src_bgra, dst_y, width, + &kRawI601DotProdConstants); } void RGBToYMatrixRow_NEON(const uint8_t* src_rgb, - uint8_t* dst_y, - int width, - const struct ArgbConstants* c) { + uint8_t* dst_y, + int width, + const struct ArgbConstants* c) { asm volatile( - "ldr s16, [%3] \n" // load 4 coeffs - "ldr s17, [%3, #48] \n" // load kAddY[0] - "dup v18.16b, v16.b[0] \n" // B - "dup v19.16b, v16.b[1] \n" // G - "dup v20.16b, v16.b[2] \n" // R - "dup v21.8h, v17.h[0] \n" // bias + "ldr d0, [%3] \n" // load rgbconstants + "dup v5.16b, v0.b[0] \n" + "dup v6.16b, v0.b[1] \n" + "dup v7.16b, v0.b[2] \n" + "dup v16.8h, v0.h[2] \n" "1: \n" "ld3 {v2.16b,v3.16b,v4.16b}, [%0], #48 \n" // load 16 pixels. "subs %w2, %w2, #16 \n" // 16 processed per loop. - "umull v0.8h, v2.8b, v18.8b \n" // B - "umull2 v1.8h, v2.16b, v18.16b \n" + "umull v0.8h, v2.8b, v5.8b \n" // B + "umull2 v1.8h, v2.16b, v5.16b \n" "prfm pldl1keep, [%0, 448] \n" - "umlal v0.8h, v3.8b, v19.8b \n" // G - "umlal2 v1.8h, v3.16b, v19.16b \n" - "umlal v0.8h, v4.8b, v20.8b \n" // R - "umlal2 v1.8h, v4.16b, v20.16b \n" - "addhn v0.8b, v0.8h, v21.8h \n" // 16 bit to 8 bit Y - "addhn v1.8b, v1.8h, v21.8h \n" + "umlal v0.8h, v3.8b, v6.8b \n" // G + "umlal2 v1.8h, v3.16b, v6.16b \n" + "umlal v0.8h, v4.8b, v7.8b \n" // R + "umlal2 v1.8h, v4.16b, v7.16b \n" + "addhn v0.8b, v0.8h, v16.8h \n" // 16 bit to 8 bit Y + "addhn v1.8b, v1.8h, v16.8h \n" "st1 {v0.8b, v1.8b}, [%1], #16 \n" // store 16 pixels Y. "b.gt 1b \n" - : "+r"(src_rgb), // %0 - "+r"(dst_y), // %1 - "+r"(width) // %2 - : "r"(c) // %3 - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v16", "v17", "v18", - "v19", "v20", "v21"); + : "+r"(src_rgb), // %0 + "+r"(dst_y), // %1 + "+r"(width) // %2 + : "r"(c) // %3 + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"); } + + + + // Bilinear filter 16x2 -> 16x1 void InterpolateRow_NEON(uint8_t* dst_ptr, const uint8_t* src_ptr, @@ -4744,10 +5015,10 @@ void ConvertFP16ToFP32Column_NEON(const uint16_t* src, // fp16 "str s2, [%1], #4 \n" // store 1 floats "b.gt 2b \n" "3: \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - : "r"((ptrdiff_t)src_stride * 2) // %3 + : "+r"(src), // %0 + "+r"(dst), // %1 + "+r"(width) // %2 + : "r"((ptrdiff_t)(src_stride * 2)) // %3 : "cc", "memory", "v0", "v1", "v2", "v3"); } diff --git a/source/row_rvv.cc b/source/row_rvv.cc index 91752ed16..93bc431bc 100644 --- a/source/row_rvv.cc +++ b/source/row_rvv.cc @@ -1249,22 +1249,16 @@ void MergeUVRow_RVV(const uint8_t* src_u, } #endif + + // RGB to JPeg coefficients // B * 0.1140 coefficient = 29 // G * 0.5870 coefficient = 150 // R * 0.2990 coefficient = 77 // Add 0.5 = 0x80 -static const struct ArgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0}, - {0}, - {0}, - {128}, - {0}}; +static const struct ArgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0}, {0}, {0}, {128}, {0}}; -static const struct ArgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, - {0}, - {0}, - {128}, - {0}}; +static const struct ArgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, {0}, {0}, {128}, {0}}; // RGB to BT.601 coefficients // B * 0.1016 coefficient = 25 @@ -1272,24 +1266,16 @@ static const struct ArgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, // R * 0.2578 coefficient = 66 // Add 16.5 = 0x1080 -static const struct ArgbConstants kRgb24I601Constants = {{25, 129, 66, 0}, - {0}, - {0}, - {0x1080}, - {0}}; +static const struct ArgbConstants kRgb24I601Constants = {{25, 129, 66, 0}, {0}, {0}, {0x1080}, {0}}; -static const struct ArgbConstants kRawI601Constants = {{66, 129, 25, 0}, - {0}, - {0}, - {0x1080}, - {0}}; +static const struct ArgbConstants kRawI601Constants = {{66, 129, 25, 0}, {0}, {0}, {0x1080}, {0}}; // ARGB expects first 3 values to contain RGB and 4th value is ignored #ifdef HAS_ARGBTOYMATRIXROW_RVV void ARGBToYMatrixRow_RVV(const uint8_t* src_argb, - uint8_t* dst_y, - int width, - const struct ArgbConstants* c) { + uint8_t* dst_y, + int width, + const struct ArgbConstants* c) { assert(width != 0); size_t w = (size_t)width; vuint8m2_t v_by, v_gy, v_ry; // vectors are to store RGBToY constant diff --git a/source/row_sme.cc b/source/row_sme.cc index 2291562e2..fca536dc4 100644 --- a/source/row_sme.cc +++ b/source/row_sme.cc @@ -1127,10 +1127,9 @@ __arm_locally_streaming void ARGBToUVMatrixRow_SME( uint8_t* dst_v, int width, const struct ArgbConstants* c) { - int8_t uvconstants[8] = {(int8_t)c->kRGBToU[0], (int8_t)c->kRGBToU[1], - (int8_t)c->kRGBToU[2], (int8_t)c->kRGBToU[3], - (int8_t)c->kRGBToV[0], (int8_t)c->kRGBToV[1], - (int8_t)c->kRGBToV[2], (int8_t)c->kRGBToV[3]}; + int8_t uvconstants[8] = { + (int8_t)c->kRGBToU[0], (int8_t)c->kRGBToU[1], (int8_t)c->kRGBToU[2], (int8_t)c->kRGBToU[3], + (int8_t)c->kRGBToV[0], (int8_t)c->kRGBToV[1], (int8_t)c->kRGBToV[2], (int8_t)c->kRGBToV[3]}; ARGBToUVMatrixRow_SVE_SC(src_argb, src_stride_argb, dst_u, dst_v, width, uvconstants); } diff --git a/source/row_sve.cc b/source/row_sve.cc index 662685882..7d8734921 100644 --- a/source/row_sve.cc +++ b/source/row_sve.cc @@ -223,10 +223,9 @@ void ARGBToUVMatrixRow_SVE2(const uint8_t* src_argb, uint8_t* dst_v, int width, const struct ArgbConstants* c) { - int8_t uvconstants[8] = {(int8_t)c->kRGBToU[0], (int8_t)c->kRGBToU[1], - (int8_t)c->kRGBToU[2], (int8_t)c->kRGBToU[3], - (int8_t)c->kRGBToV[0], (int8_t)c->kRGBToV[1], - (int8_t)c->kRGBToV[2], (int8_t)c->kRGBToV[3]}; + int8_t uvconstants[8] = { + (int8_t)c->kRGBToU[0], (int8_t)c->kRGBToU[1], (int8_t)c->kRGBToU[2], (int8_t)c->kRGBToU[3], + (int8_t)c->kRGBToV[0], (int8_t)c->kRGBToV[1], (int8_t)c->kRGBToV[2], (int8_t)c->kRGBToV[3]}; ARGBToUVMatrixRow_SVE_SC(src_argb, src_stride_argb, dst_u, dst_v, width, uvconstants); } diff --git a/source/row_win.cc b/source/row_win.cc index a7ed75199..77070d031 100644 --- a/source/row_win.cc +++ b/source/row_win.cc @@ -8,19 +8,19 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "libyuv/convert_from_argb.h" // For ArgbConstants #include "libyuv/row.h" +#include "libyuv/convert_from_argb.h" // For ArgbConstants // This module is for Visual C 32/64 bit -#if !defined(LIBYUV_DISABLE_X86) && \ - (defined(__x86_64__) || defined(__i386__) || defined(_M_X64) || \ - defined(_M_X86)) && \ - ((defined(_MSC_VER) && !defined(__clang__)) || \ +#if !defined(LIBYUV_DISABLE_X86) && \ + (defined(__x86_64__) || defined(__i386__) || \ + defined(_M_X64) || defined(_M_X86)) && \ + ((defined(_MSC_VER) && !defined(__clang__)) || \ defined(LIBYUV_ENABLE_ROWWIN)) #include -#include // For AVX2 intrinsics #include // For _mm_maddubs_epi16 +#include // For AVX2 intrinsics #ifdef __cplusplus namespace libyuv { @@ -102,91 +102,42 @@ extern "C" { _mm_storeu_si128((__m128i*)(dst_argb + 16), xmm1); \ dst_argb += 32; -#if defined(HAS_ARGBTOYMATRIXROW_AVX2) +#if defined(HAS_I422TOARGBROW_SSSE3) + +#endif + +#if defined(HAS_I422ALPHATOARGBROW_SSSE3) + +#endif + +#if defined(HAS_I444TOARGBROW_SSSE3) + +#endif + +#if defined(HAS_I444ALPHATOARGBROW_SSSE3) + +#endif + +#if defined(HAS_ARGBTOYROW_AVX2) #if defined(__clang__) || defined(__GNUC__) #define LIBYUV_TARGET_AVX2 __attribute__((target("avx2"))) -#define LIBYUV_TARGET_AVX512BW \ - __attribute__((target("avx512bw,avx512vl,avx512f"))) +#define LIBYUV_TARGET_AVX512BW __attribute__((target("avx512bw,avx512vl,avx512f"))) #else #define LIBYUV_TARGET_AVX2 #define LIBYUV_TARGET_AVX512BW #endif -// Convert 32 ARGB pixels (128 bytes) to 32 UV444 values. -#if defined(HAS_ARGBTOYMATRIXROW_AVX2) || defined(HAS_ARGBTOUV444MATRIXROW_AVX2) -LIBYUV_TARGET_AVX2 -void ARGBToUV444MatrixRow_AVX2(const uint8_t* src_argb, - uint8_t* dst_u, - uint8_t* dst_v, - int width, - const struct ArgbConstants* c) { - __m256i ymm_u = - _mm256_broadcastsi128_si256(_mm_loadu_si128((const __m128i*)c->kRGBToU)); - __m256i ymm_v = - _mm256_broadcastsi128_si256(_mm_loadu_si128((const __m128i*)c->kRGBToV)); - __m256i ymm5 = _mm256_set1_epi16((short)0x8000); - __m256i perm_mask = _mm256_setr_epi32(0, 4, 1, 5, 2, 6, 3, 7); - - while (width > 0) { - __m256i ymm0 = _mm256_loadu_si256((const __m256i*)src_argb); - __m256i ymm1 = _mm256_loadu_si256((const __m256i*)(src_argb + 32)); - __m256i ymm2 = _mm256_loadu_si256((const __m256i*)(src_argb + 64)); - __m256i ymm3 = _mm256_loadu_si256((const __m256i*)(src_argb + 96)); - src_argb += 128; - - __m256i ymm0_u = _mm256_maddubs_epi16(ymm0, ymm_u); - __m256i ymm1_u = _mm256_maddubs_epi16(ymm1, ymm_u); - __m256i ymm2_u = _mm256_maddubs_epi16(ymm2, ymm_u); - __m256i ymm3_u = _mm256_maddubs_epi16(ymm3, ymm_u); - - __m256i ymm0_v = _mm256_maddubs_epi16(ymm0, ymm_v); - __m256i ymm1_v = _mm256_maddubs_epi16(ymm1, ymm_v); - __m256i ymm2_v = _mm256_maddubs_epi16(ymm2, ymm_v); - __m256i ymm3_v = _mm256_maddubs_epi16(ymm3, ymm_v); - - ymm0_u = _mm256_hadd_epi16(ymm0_u, ymm1_u); - ymm2_u = _mm256_hadd_epi16(ymm2_u, ymm3_u); - - ymm0_v = _mm256_hadd_epi16(ymm0_v, ymm1_v); - ymm2_v = _mm256_hadd_epi16(ymm2_v, ymm3_v); - - ymm0_u = _mm256_sub_epi16(ymm5, ymm0_u); - ymm2_u = _mm256_sub_epi16(ymm5, ymm2_u); - - ymm0_v = _mm256_sub_epi16(ymm5, ymm0_v); - ymm2_v = _mm256_sub_epi16(ymm5, ymm2_v); - - ymm0_u = _mm256_srli_epi16(ymm0_u, 8); - ymm2_u = _mm256_srli_epi16(ymm2_u, 8); - - ymm0_v = _mm256_srli_epi16(ymm0_v, 8); - ymm2_v = _mm256_srli_epi16(ymm2_v, 8); - - ymm0_u = _mm256_packus_epi16(ymm0_u, ymm2_u); - ymm0_u = _mm256_permutevar8x32_epi32(ymm0_u, perm_mask); - - ymm0_v = _mm256_packus_epi16(ymm0_v, ymm2_v); - ymm0_v = _mm256_permutevar8x32_epi32(ymm0_v, perm_mask); - - _mm256_storeu_si256((__m256i*)dst_u, ymm0_u); - _mm256_storeu_si256((__m256i*)dst_v, ymm0_v); - dst_u += 32; - dst_v += 32; - width -= 32; - } -} -#endif LIBYUV_TARGET_AVX2 void ARGBToYMatrixRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width, const struct ArgbConstants* c) { __m256i ymm5 = _mm256_set1_epi8((char)0x80); - __m256i ymm4 = - _mm256_broadcastsi128_si256(_mm_loadu_si128((const __m128i*)c->kRGBToY)); - __m256i ymm7 = - _mm256_broadcastsi128_si256(_mm_loadu_si128((const __m128i*)c->kAddY)); + __m128i kRGBToY = _mm_loadu_si128((const __m128i*)c->kRGBToY); + __m256i ymm4 = _mm256_broadcastsi128_si256(kRGBToY); + __m128i kAddY = _mm_loadu_si128((const __m128i*)c->kAddY); + __m256i ymm7 = _mm256_broadcastsi128_si256(kAddY); __m256i ymm6 = _mm256_maddubs_epi16(ymm4, ymm5); ymm6 = _mm256_hadd_epi16(ymm6, ymm6); ymm7 = _mm256_sub_epi16(ymm7, ymm6); @@ -266,33 +217,27 @@ void BGRAToYRow_AVX2(const uint8_t* src_bgra, uint8_t* dst_y, int width) { LIBYUV_TARGET_AVX2 void RAWToARGBRow_AVX2(const uint8_t* src_raw, uint8_t* dst_argb, int width) { __m256i ymm_alpha = _mm256_set1_epi32(0xff000000); - __m128i shuf_low = - _mm_set_epi8(-1, 9, 10, 11, -1, 6, 7, 8, -1, 3, 4, 5, -1, 0, 1, 2); - __m128i shuf_high = - _mm_set_epi8(-1, 13, 14, 15, -1, 10, 11, 12, -1, 7, 8, 9, -1, 4, 5, 6); + __m128i shuf_low = _mm_set_epi8(-1, 9, 10, 11, -1, 6, 7, 8, -1, 3, 4, 5, -1, 0, 1, 2); + __m128i shuf_high = _mm_set_epi8(-1, 13, 14, 15, -1, 10, 11, 12, -1, 7, 8, 9, -1, 4, 5, 6); __m256i ymm_shuf = _mm256_broadcastsi128_si256(shuf_low); __m256i ymm_shuf2 = _mm256_broadcastsi128_si256(shuf_high); while (width > 0) { __m128i xmm0 = _mm_loadu_si128((const __m128i*)src_raw); __m256i ymm0 = _mm256_castsi128_si256(xmm0); - ymm0 = _mm256_inserti128_si256( - ymm0, _mm_loadu_si128((const __m128i*)(src_raw + 12)), 1); + ymm0 = _mm256_inserti128_si256(ymm0, _mm_loadu_si128((const __m128i*)(src_raw + 12)), 1); __m128i xmm1 = _mm_loadu_si128((const __m128i*)(src_raw + 24)); __m256i ymm1 = _mm256_castsi128_si256(xmm1); - ymm1 = _mm256_inserti128_si256( - ymm1, _mm_loadu_si128((const __m128i*)(src_raw + 36)), 1); + ymm1 = _mm256_inserti128_si256(ymm1, _mm_loadu_si128((const __m128i*)(src_raw + 36)), 1); __m128i xmm2 = _mm_loadu_si128((const __m128i*)(src_raw + 48)); __m256i ymm2 = _mm256_castsi128_si256(xmm2); - ymm2 = _mm256_inserti128_si256( - ymm2, _mm_loadu_si128((const __m128i*)(src_raw + 60)), 1); + ymm2 = _mm256_inserti128_si256(ymm2, _mm_loadu_si128((const __m128i*)(src_raw + 60)), 1); __m128i xmm3 = _mm_loadu_si128((const __m128i*)(src_raw + 68)); __m256i ymm3 = _mm256_castsi128_si256(xmm3); - ymm3 = _mm256_inserti128_si256( - ymm3, _mm_loadu_si128((const __m128i*)(src_raw + 80)), 1); + ymm3 = _mm256_inserti128_si256(ymm3, _mm_loadu_si128((const __m128i*)(src_raw + 80)), 1); ymm0 = _mm256_shuffle_epi8(ymm0, ymm_shuf); ymm1 = _mm256_shuffle_epi8(ymm1, ymm_shuf); @@ -318,13 +263,10 @@ void RAWToARGBRow_AVX2(const uint8_t* src_raw, uint8_t* dst_argb, int width) { #ifdef HAS_RAWTOARGBROW_AVX512BW LIBYUV_TARGET_AVX512BW -void RGBToARGBRow_AVX512BW(const uint8_t* src_raw, - uint8_t* dst_argb, - const __m128i* shuffler, - int width) { +void RGBToARGBRow_AVX512BW(const uint8_t* src_raw, uint8_t* dst_argb, const __m128i* shuffler, int width) { __m512i zmm_alpha = _mm512_set1_epi32(0xff000000); - __m512i zmm_perm = - _mm512_set_epi32(12, 11, 10, 9, 9, 8, 7, 6, 6, 5, 4, 3, 3, 2, 1, 0); + __m512i zmm_perm = _mm512_set_epi32( + 12, 11, 10, 9, 9, 8, 7, 6, 6, 5, 4, 3, 3, 2, 1, 0); __m512i zmm_shuf = _mm512_broadcast_i32x4(_mm_loadu_si128(shuffler)); while (width > 0) { @@ -360,26 +302,20 @@ void RGBToARGBRow_AVX512BW(const uint8_t* src_raw, } LIBYUV_TARGET_AVX512BW -void RAWToARGBRow_AVX512BW(const uint8_t* src_raw, - uint8_t* dst_argb, - int width) { - __m128i shuf = - _mm_set_epi8(-1, 9, 10, 11, -1, 6, 7, 8, -1, 3, 4, 5, -1, 0, 1, 2); +void RAWToARGBRow_AVX512BW(const uint8_t* src_raw, uint8_t* dst_argb, int width) { + __m128i shuf = _mm_set_epi8(-1, 9, 10, 11, -1, 6, 7, 8, -1, 3, 4, 5, -1, 0, 1, 2); RGBToARGBRow_AVX512BW(src_raw, dst_argb, &shuf, width); } LIBYUV_TARGET_AVX512BW -void RGB24ToARGBRow_AVX512BW(const uint8_t* src_rgb24, - uint8_t* dst_argb, - int width) { - __m128i shuf = - _mm_set_epi8(-1, 11, 10, 9, -1, 8, 7, 6, -1, 5, 4, 3, -1, 2, 1, 0); +void RGB24ToARGBRow_AVX512BW(const uint8_t* src_rgb24, uint8_t* dst_argb, int width) { + __m128i shuf = _mm_set_epi8(-1, 11, 10, 9, -1, 8, 7, 6, -1, 5, 4, 3, -1, 2, 1, 0); RGBToARGBRow_AVX512BW(src_rgb24, dst_argb, &shuf, width); } #endif #ifdef HAS_ARGBTOUVMATRIXROW_AVX2 -LIBYUV_TARGET_AVX2 +LIBYUV_TARGET_AVX2 __attribute__((no_sanitize("cfi-icall"))) void ARGBToUVMatrixRow_AVX2(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u, @@ -389,19 +325,16 @@ void ARGBToUVMatrixRow_AVX2(const uint8_t* src_argb, __m256i ymm_u = _mm256_broadcastsi128_si256(_mm_loadu_si128((const __m128i*)c->kRGBToU)); __m256i ymm_v = _mm256_broadcastsi128_si256(_mm_loadu_si128((const __m128i*)c->kRGBToV)); __m256i ymm_0101 = _mm256_set1_epi16(0x0101); - __m256i ymm_shuf = - _mm256_setr_epi8(0, 4, 1, 5, 2, 6, 3, 7, 8, 12, 9, 13, 10, 14, 11, 15, 0, - 4, 1, 5, 2, 6, 3, 7, 8, 12, 9, 13, 10, 14, 11, 15); + __m256i ymm_shuf = _mm256_setr_epi8(0, 4, 1, 5, 2, 6, 3, 7, 8, 12, 9, 13, 10, 14, 11, 15, + 0, 4, 1, 5, 2, 6, 3, 7, 8, 12, 9, 13, 10, 14, 11, 15); __m256i ymm_8000 = _mm256_set1_epi16((short)0x8000); __m256i ymm_zero = _mm256_setzero_si256(); while (width > 0) { __m256i ymm0 = _mm256_loadu_si256((const __m256i*)src_argb); __m256i ymm1 = _mm256_loadu_si256((const __m256i*)(src_argb + 32)); - __m256i ymm2 = - _mm256_loadu_si256((const __m256i*)(src_argb + src_stride_argb)); - __m256i ymm3 = - _mm256_loadu_si256((const __m256i*)(src_argb + src_stride_argb + 32)); + __m256i ymm2 = _mm256_loadu_si256((const __m256i*)(src_argb + src_stride_argb)); + __m256i ymm3 = _mm256_loadu_si256((const __m256i*)(src_argb + src_stride_argb + 32)); ymm0 = _mm256_shuffle_epi8(ymm0, ymm_shuf); ymm1 = _mm256_shuffle_epi8(ymm1, ymm_shuf); @@ -470,515 +403,12 @@ void MergeUVRow_AVX2(const uint8_t* src_u, } #endif -#ifdef HAS_MIRRORROW_AVX2 -LIBYUV_TARGET_AVX2 -void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width) { - __m256i ymm_shuf = _mm256_broadcastsi128_si256( - _mm_setr_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); - src += width; - while (width > 0) { - src -= 32; - __m256i ymm0 = _mm256_loadu_si256((const __m256i*)src); - ymm0 = _mm256_shuffle_epi8(ymm0, ymm_shuf); - ymm0 = _mm256_permute4x64_epi64(ymm0, 0x4e); - _mm256_storeu_si256((__m256i*)dst, ymm0); - dst += 32; - width -= 32; - } -} #endif -#ifdef HAS_MIRRORUVROW_AVX2 -LIBYUV_TARGET_AVX2 -void MirrorUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_uv, int width) { - __m256i ymm_shuf = _mm256_broadcastsi128_si256( - _mm_setr_epi8(14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1)); - src_uv += width * 2; - while (width > 0) { - src_uv -= 32; - __m256i ymm0 = _mm256_loadu_si256((const __m256i*)src_uv); - ymm0 = _mm256_shuffle_epi8(ymm0, ymm_shuf); - ymm0 = _mm256_permute4x64_epi64(ymm0, 0x4e); - _mm256_storeu_si256((__m256i*)dst_uv, ymm0); - dst_uv += 32; - width -= 16; - } -} -#endif - -#ifdef HAS_MIRRORSPLITUVROW_AVX2 -LIBYUV_TARGET_AVX2 -void MirrorSplitUVRow_AVX2(const uint8_t* src_uv, - uint8_t* dst_u, - uint8_t* dst_v, - int width) { - __m256i ymm_shuf = _mm256_broadcastsi128_si256( - _mm_setr_epi8(14, 12, 10, 8, 6, 4, 2, 0, 15, 13, 11, 9, 7, 5, 3, 1)); - src_uv += width * 2; - while (width > 0) { - src_uv -= 32; - __m256i ymm0 = _mm256_loadu_si256((const __m256i*)src_uv); - ymm0 = _mm256_shuffle_epi8(ymm0, ymm_shuf); - ymm0 = _mm256_permute4x64_epi64(ymm0, 0x72); - _mm_storeu_si128((__m128i*)dst_u, _mm256_castsi256_si128(ymm0)); - _mm_storeu_si128((__m128i*)dst_v, _mm256_extracti128_si256(ymm0, 1)); - dst_u += 16; - dst_v += 16; - width -= 16; - } -} -#endif - -#ifdef HAS_RGB24MIRRORROW_AVX2 -LIBYUV_TARGET_AVX2 -void RGB24MirrorRow_AVX2(const uint8_t* src_rgb24, - uint8_t* dst_rgb24, - int width) { - __m256i shuf0 = - _mm256_setr_epi8(-1, 12, 13, 14, 9, 10, 11, 6, 7, 8, 3, 4, 5, 0, 1, 2, -1, - 12, 13, 14, 9, 10, 11, 6, 7, 8, 3, 4, 5, 0, 1, 2); - __m128i shuf1 = - _mm_setr_epi8(13, 14, 15, 10, 11, 12, 7, 8, 9, 4, 5, 6, 1, 2, 3, -1); - - src_rgb24 += width * 3 - 96; - while (width > 0) { - __m128i v0_lo = _mm_loadu_si128((const __m128i*)(src_rgb24 + 0)); - __m128i v0_hi = _mm_loadu_si128((const __m128i*)(src_rgb24 + 15)); - __m256i v0 = - _mm256_inserti128_si256(_mm256_castsi128_si256(v0_lo), v0_hi, 1); - - __m128i v1_lo = _mm_loadu_si128((const __m128i*)(src_rgb24 + 30)); - __m128i v1_hi = _mm_loadu_si128((const __m128i*)(src_rgb24 + 45)); - __m256i v1 = - _mm256_inserti128_si256(_mm256_castsi128_si256(v1_lo), v1_hi, 1); - - __m128i v2_lo = _mm_loadu_si128((const __m128i*)(src_rgb24 + 60)); - __m128i v2_hi = _mm_loadu_si128((const __m128i*)(src_rgb24 + 75)); - __m256i v2 = - _mm256_inserti128_si256(_mm256_castsi128_si256(v2_lo), v2_hi, 1); - - __m128i v3 = _mm_loadu_si128((const __m128i*)(src_rgb24 + 80)); - - v0 = _mm256_shuffle_epi8(v0, shuf0); - v1 = _mm256_shuffle_epi8(v1, shuf0); - v2 = _mm256_shuffle_epi8(v2, shuf0); - v3 = _mm_shuffle_epi8(v3, shuf1); - - _mm_storeu_si128((__m128i*)(dst_rgb24 + 80), _mm256_castsi256_si128(v0)); - _mm_storeu_si128((__m128i*)(dst_rgb24 + 65), - _mm256_extracti128_si256(v0, 1)); - _mm_storeu_si128((__m128i*)(dst_rgb24 + 50), _mm256_castsi256_si128(v1)); - _mm_storeu_si128((__m128i*)(dst_rgb24 + 35), - _mm256_extracti128_si256(v1, 1)); - _mm_storeu_si128((__m128i*)(dst_rgb24 + 20), _mm256_castsi256_si128(v2)); - _mm_storeu_si128((__m128i*)(dst_rgb24 + 5), - _mm256_extracti128_si256(v2, 1)); - _mm_storel_epi64((__m128i*)(dst_rgb24 + 0), v3); - - src_rgb24 -= 96; - dst_rgb24 += 96; - width -= 32; - } -} -#endif - -#ifdef HAS_INTERPOLATEROW_AVX2 -LIBYUV_TARGET_AVX2 -void InterpolateRow_AVX2(uint8_t* dst_ptr, - const uint8_t* src_ptr, - ptrdiff_t src_stride, - int width, - int source_y_fraction) { - int y1 = source_y_fraction; - int y0 = 256 - y1; - const uint8_t* src_ptr1 = src_ptr + src_stride; - __m256i ymm_y = _mm256_set1_epi16((y1 << 8) | y0); - __m256i ymm_8080 = _mm256_set1_epi16(0x8080); - int i; - - if (y1 == 0) { - for (i = 0; i < width; i += 32) { - _mm256_storeu_si256((__m256i*)(dst_ptr + i), - _mm256_loadu_si256((const __m256i*)(src_ptr + i))); - } - } else if (y1 == 128) { - for (i = 0; i < width; i += 32) { - __m256i row0 = _mm256_loadu_si256((const __m256i*)(src_ptr + i)); - __m256i row1 = _mm256_loadu_si256((const __m256i*)(src_ptr1 + i)); - _mm256_storeu_si256((__m256i*)(dst_ptr + i), _mm256_avg_epu8(row0, row1)); - } - } else { - for (i = 0; i < width; i += 32) { - __m256i row0 = _mm256_loadu_si256((const __m256i*)(src_ptr + i)); - __m256i row1 = _mm256_loadu_si256((const __m256i*)(src_ptr1 + i)); - __m256i low = _mm256_unpacklo_epi8(row0, row1); - __m256i high = _mm256_unpackhi_epi8(row0, row1); - low = _mm256_sub_epi8(low, ymm_8080); - high = _mm256_sub_epi8(high, ymm_8080); - low = _mm256_maddubs_epi16(ymm_y, low); - high = _mm256_maddubs_epi16(ymm_y, high); - low = _mm256_add_epi16(low, ymm_8080); - high = _mm256_add_epi16(high, ymm_8080); - low = _mm256_srli_epi16(low, 8); - high = _mm256_srli_epi16(high, 8); - _mm256_storeu_si256((__m256i*)(dst_ptr + i), - _mm256_packus_epi16(low, high)); - } - } - _mm256_zeroupper(); -} -#endif - -#ifdef HAS_INTERPOLATEROW_16_AVX2 -LIBYUV_TARGET_AVX2 -void InterpolateRow_16_AVX2(uint16_t* dst_ptr, - const uint16_t* src_ptr, - ptrdiff_t src_stride, - int width, - int source_y_fraction) { - int y1 = source_y_fraction; - int y0 = 256 - y1; - const uint16_t* src_ptr1 = src_ptr + src_stride; - __m256i ymm_y = _mm256_set1_epi32((y1 << 16) | y0); - __m256i ymm_8000 = _mm256_set1_epi16((short)0x8000); - __m256i ymm_round = _mm256_set1_epi32(8388736); // 0x800000 + 128 - int i; - - if (y1 == 0) { - for (i = 0; i < width; i += 16) { - _mm256_storeu_si256((__m256i*)(dst_ptr + i), - _mm256_loadu_si256((const __m256i*)(src_ptr + i))); - } - } else if (y1 == 128) { - for (i = 0; i < width; i += 16) { - __m256i row0 = _mm256_loadu_si256((const __m256i*)(src_ptr + i)); - __m256i row1 = _mm256_loadu_si256((const __m256i*)(src_ptr1 + i)); - _mm256_storeu_si256((__m256i*)(dst_ptr + i), - _mm256_avg_epu16(row0, row1)); - } - } else { - for (i = 0; i < width; i += 16) { - __m256i row0 = _mm256_loadu_si256((const __m256i*)(src_ptr + i)); - __m256i row1 = _mm256_loadu_si256((const __m256i*)(src_ptr1 + i)); - __m256i row0l = _mm256_unpacklo_epi16(row0, row1); - __m256i row0h = _mm256_unpackhi_epi16(row0, row1); - row0l = _mm256_sub_epi16(row0l, ymm_8000); - row0h = _mm256_sub_epi16(row0h, ymm_8000); - __m256i resl = _mm256_madd_epi16(row0l, ymm_y); - __m256i resh = _mm256_madd_epi16(row0h, ymm_y); - resl = _mm256_add_epi32(resl, ymm_round); - resh = _mm256_add_epi32(resh, ymm_round); - resl = _mm256_srai_epi32(resl, 8); - resh = _mm256_srai_epi32(resh, 8); - _mm256_storeu_si256((__m256i*)(dst_ptr + i), - _mm256_packus_epi32(resl, resh)); - } - } - _mm256_zeroupper(); -} -#endif - -#ifdef HAS_ARGBMIRRORROW_AVX2 -LIBYUV_TARGET_AVX2 -void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width) { - __m256i ymm_shuf = _mm256_setr_epi32(7, 6, 5, 4, 3, 2, 1, 0); - src += width * 4; - while (width > 0) { - src -= 32; - __m256i ymm0 = _mm256_loadu_si256((const __m256i*)src); - ymm0 = _mm256_permutevar8x32_epi32(ymm0, ymm_shuf); - _mm256_storeu_si256((__m256i*)dst, ymm0); - dst += 32; - width -= 8; - } -} -#endif - -#ifdef HAS_J400TOARGBROW_AVX2 -alignas(32) static const uint8_t kShuffleMaskJ400ToARGB_0[32] = { - 0u, 0u, 0u, 128u, 1u, 1u, 1u, 128u, 2u, 2u, 2u, 128u, 3u, 3u, 3u, 128u, - 4u, 4u, 4u, 128u, 5u, 5u, 5u, 128u, 6u, 6u, 6u, 128u, 7u, 7u, 7u, 128u}; -alignas(32) static const uint8_t kShuffleMaskJ400ToARGB_1[32] = { - 8u, 8u, 8u, 128u, 9u, 9u, 9u, 128u, 10u, 10u, 10u, - 128u, 11u, 11u, 11u, 128u, 12u, 12u, 12u, 128u, 13u, 13u, - 13u, 128u, 14u, 14u, 14u, 128u, 15u, 15u, 15u, 128u}; - -LIBYUV_TARGET_AVX2 -void J400ToARGBRow_AVX2(const uint8_t* src_y, uint8_t* dst_argb, int width) { - __m256i ymm_mask0 = - _mm256_load_si256((const __m256i*)kShuffleMaskJ400ToARGB_0); - __m256i ymm_mask1 = - _mm256_load_si256((const __m256i*)kShuffleMaskJ400ToARGB_1); - __m256i ymm_alpha = _mm256_set1_epi32((int)0xff000000u); - - while (width > 0) { - __m256i ymm0 = - _mm256_broadcastsi128_si256(_mm_loadu_si128((const __m128i*)src_y)); - - __m256i ymm1 = _mm256_shuffle_epi8(ymm0, ymm_mask0); - __m256i ymm2 = _mm256_shuffle_epi8(ymm0, ymm_mask1); - - ymm1 = _mm256_or_si256(ymm1, ymm_alpha); - ymm2 = _mm256_or_si256(ymm2, ymm_alpha); - - _mm256_storeu_si256((__m256i*)dst_argb, ymm1); - _mm256_storeu_si256((__m256i*)(dst_argb + 32), ymm2); - - src_y += 16; - dst_argb += 64; - width -= 16; - } -} -#endif // HAS_J400TOARGBROW_AVX2 - -#ifdef HAS_RGB24TOARGBROW_AVX2 -alignas(16) static const uint8_t kShuffleMaskRGB24ToARGB[2][16] = { - {0u, 1u, 2u, 128u, 3u, 4u, 5u, 128u, 6u, 7u, 8u, 128u, 9u, 10u, 11u, 128u}, - {4u, 5u, 6u, 128u, 7u, 8u, 9u, 128u, 10u, 11u, 12u, 128u, 13u, 14u, 15u, - 128u}}; -#endif - -#ifdef HAS_RGB565TOARGBROW_AVX2 -LIBYUV_TARGET_AVX2 -void RGB565ToARGBRow_AVX2(const uint8_t* src_rgb565, - uint8_t* dst_argb, - int width) { - __m256i ymm_scale_rb = _mm256_set1_epi32(0x01080108); - __m256i ymm_scale_g = _mm256_set1_epi32(0x20802080); - __m256i ymm_mask_b = _mm256_set1_epi16((short)0xf800); - __m256i ymm_mask_g = _mm256_set1_epi16(0x07e0); - __m256i ymm_mask_a = _mm256_set1_epi16((short)0xff00); - - while (width > 0) { - __m256i ymm0 = _mm256_loadu_si256((const __m256i*)src_rgb565); - __m256i ymm1 = ymm0; - __m256i ymm2 = ymm0; - - ymm1 = _mm256_and_si256(ymm1, ymm_mask_b); - ymm2 = _mm256_slli_epi16(ymm2, 11); - ymm1 = _mm256_mulhi_epu16(ymm1, ymm_scale_rb); - ymm2 = _mm256_mulhi_epu16(ymm2, ymm_scale_rb); - ymm1 = _mm256_slli_epi16(ymm1, 8); - ymm1 = _mm256_or_si256(ymm1, ymm2); // RB - - ymm0 = _mm256_and_si256(ymm0, ymm_mask_g); - ymm0 = _mm256_mulhi_epu16(ymm0, ymm_scale_g); - ymm0 = _mm256_or_si256(ymm0, ymm_mask_a); // GA - - ymm2 = _mm256_unpacklo_epi8(ymm1, ymm0); - ymm1 = _mm256_unpackhi_epi8(ymm1, ymm0); - - ymm0 = _mm256_permute2x128_si256(ymm2, ymm1, 0x20); - ymm1 = _mm256_permute2x128_si256(ymm2, ymm1, 0x31); - - _mm256_storeu_si256((__m256i*)dst_argb, ymm0); - _mm256_storeu_si256((__m256i*)(dst_argb + 32), ymm1); - - src_rgb565 += 32; - dst_argb += 64; - width -= 16; - } - _mm256_zeroupper(); -} -#endif - -#ifdef HAS_ARGB1555TOARGBROW_AVX2 -LIBYUV_TARGET_AVX2 -void ARGB1555ToARGBRow_AVX2(const uint8_t* src_argb1555, - uint8_t* dst_argb, - int width) { - __m256i ymm_scale_rb = _mm256_set1_epi32(0x01080108); - __m256i ymm_scale_g = _mm256_set1_epi32(0x42004200); - __m256i ymm_mask_b = _mm256_set1_epi16((short)0xf800); - __m256i ymm_mask_g = _mm256_set1_epi16(0x03e0); - __m256i ymm_mask_a = _mm256_set1_epi16((short)0xff00); - - while (width > 0) { - __m256i ymm0 = _mm256_loadu_si256((const __m256i*)src_argb1555); - __m256i ymm1 = ymm0; - __m256i ymm2 = ymm0; - - ymm1 = _mm256_slli_epi16(ymm1, 1); - ymm2 = _mm256_slli_epi16(ymm2, 11); - ymm1 = _mm256_and_si256(ymm1, ymm_mask_b); - ymm2 = _mm256_mulhi_epu16(ymm2, ymm_scale_rb); - ymm1 = _mm256_mulhi_epu16(ymm1, ymm_scale_rb); - ymm1 = _mm256_slli_epi16(ymm1, 8); - ymm1 = _mm256_or_si256(ymm1, ymm2); // RB - - ymm2 = ymm0; - ymm0 = _mm256_and_si256(ymm0, ymm_mask_g); - ymm2 = _mm256_srai_epi16(ymm2, 8); - ymm0 = _mm256_mulhi_epu16(ymm0, ymm_scale_g); - ymm2 = _mm256_and_si256(ymm2, ymm_mask_a); - ymm0 = _mm256_or_si256(ymm0, ymm2); // GA - - ymm2 = _mm256_unpacklo_epi8(ymm1, ymm0); - ymm1 = _mm256_unpackhi_epi8(ymm1, ymm0); - - ymm0 = _mm256_permute2x128_si256(ymm2, ymm1, 0x20); - ymm1 = _mm256_permute2x128_si256(ymm2, ymm1, 0x31); - - _mm256_storeu_si256((__m256i*)dst_argb, ymm0); - _mm256_storeu_si256((__m256i*)(dst_argb + 32), ymm1); - - src_argb1555 += 32; - dst_argb += 64; - width -= 16; - } - _mm256_zeroupper(); -} -#endif - -#ifdef HAS_ARGB4444TOARGBROW_AVX2 -LIBYUV_TARGET_AVX2 -void ARGB4444ToARGBRow_AVX2(const uint8_t* src_argb4444, - uint8_t* dst_argb, - int width) { - __m256i ymm_mask = _mm256_set1_epi32(0x0f0f0f0f); - __m256i ymm_mask2 = _mm256_slli_epi32(ymm_mask, 4); - - while (width > 0) { - __m256i ymm0 = _mm256_loadu_si256((const __m256i*)src_argb4444); - __m256i ymm2 = ymm0; - - ymm0 = _mm256_and_si256(ymm0, ymm_mask); - ymm2 = _mm256_and_si256(ymm2, ymm_mask2); - - __m256i ymm1 = ymm0; - __m256i ymm3 = ymm2; - - ymm1 = _mm256_slli_epi16(ymm1, 4); - ymm3 = _mm256_srli_epi16(ymm3, 4); - - ymm0 = _mm256_or_si256(ymm0, ymm1); - ymm2 = _mm256_or_si256(ymm2, ymm3); - - ymm1 = ymm0; - ymm0 = _mm256_unpacklo_epi8(ymm0, ymm2); - ymm1 = _mm256_unpackhi_epi8(ymm1, ymm2); - - ymm2 = _mm256_permute2x128_si256(ymm0, ymm1, 0x20); - ymm1 = _mm256_permute2x128_si256(ymm0, ymm1, 0x31); - - _mm256_storeu_si256((__m256i*)dst_argb, ymm2); - _mm256_storeu_si256((__m256i*)(dst_argb + 32), ymm1); - - src_argb4444 += 32; - dst_argb += 64; - width -= 16; - } - _mm256_zeroupper(); -} -#endif - -#ifdef HAS_RGB24TOARGBROW_AVX2 -LIBYUV_TARGET_AVX2 -void RGB24ToARGBRow_AVX2(const uint8_t* src_rgb24, - uint8_t* dst_argb, - int width) { - __m256i ymm_alpha = _mm256_set1_epi32(0xff000000); - __m256i ymm_shuf = _mm256_broadcastsi128_si256( - _mm_load_si128((const __m128i*)kShuffleMaskRGB24ToARGB[0])); - __m256i ymm_shuf2 = _mm256_broadcastsi128_si256( - _mm_load_si128((const __m128i*)kShuffleMaskRGB24ToARGB[1])); - - while (width > 0) { - __m128i xmm0 = _mm_loadu_si128((const __m128i*)src_rgb24); - __m256i ymm0 = _mm256_castsi128_si256(xmm0); - ymm0 = _mm256_inserti128_si256( - ymm0, _mm_loadu_si128((const __m128i*)(src_rgb24 + 12)), 1); - - __m128i xmm1 = _mm_loadu_si128((const __m128i*)(src_rgb24 + 24)); - __m256i ymm1 = _mm256_castsi128_si256(xmm1); - ymm1 = _mm256_inserti128_si256( - ymm1, _mm_loadu_si128((const __m128i*)(src_rgb24 + 36)), 1); - - __m128i xmm2 = _mm_loadu_si128((const __m128i*)(src_rgb24 + 48)); - __m256i ymm2 = _mm256_castsi128_si256(xmm2); - ymm2 = _mm256_inserti128_si256( - ymm2, _mm_loadu_si128((const __m128i*)(src_rgb24 + 60)), 1); - - __m128i xmm3 = _mm_loadu_si128((const __m128i*)(src_rgb24 + 68)); - __m256i ymm3 = _mm256_castsi128_si256(xmm3); - ymm3 = _mm256_inserti128_si256( - ymm3, _mm_loadu_si128((const __m128i*)(src_rgb24 + 80)), 1); - - ymm0 = _mm256_shuffle_epi8(ymm0, ymm_shuf); - ymm1 = _mm256_shuffle_epi8(ymm1, ymm_shuf); - ymm2 = _mm256_shuffle_epi8(ymm2, ymm_shuf); - ymm3 = _mm256_shuffle_epi8(ymm3, ymm_shuf2); - - ymm0 = _mm256_or_si256(ymm0, ymm_alpha); - ymm1 = _mm256_or_si256(ymm1, ymm_alpha); - ymm2 = _mm256_or_si256(ymm2, ymm_alpha); - ymm3 = _mm256_or_si256(ymm3, ymm_alpha); - - _mm256_storeu_si256((__m256i*)dst_argb, ymm0); - _mm256_storeu_si256((__m256i*)(dst_argb + 32), ymm1); - _mm256_storeu_si256((__m256i*)(dst_argb + 64), ymm2); - _mm256_storeu_si256((__m256i*)(dst_argb + 96), ymm3); - - src_rgb24 += 96; - dst_argb += 128; - width -= 32; - } - _mm256_zeroupper(); -} -#endif - -#ifdef HAS_ARGBSHUFFLEROW_AVX2 -LIBYUV_TARGET_AVX2 -void ARGBShuffleRow_AVX2(const uint8_t* src_argb, - uint8_t* dst_argb, - const uint8_t* shuffler, - int width) { - __m256i control = - _mm256_broadcastsi128_si256(_mm_loadu_si128((const __m128i*)shuffler)); - while (width >= 16) { - __m256i row = _mm256_loadu_si256((const __m256i*)src_argb); - __m256i row1 = _mm256_loadu_si256((const __m256i*)(src_argb + 32)); - row = _mm256_shuffle_epi8(row, control); - row1 = _mm256_shuffle_epi8(row1, control); - _mm256_storeu_si256((__m256i*)dst_argb, row); - _mm256_storeu_si256((__m256i*)(dst_argb + 32), row1); - src_argb += 64; - dst_argb += 64; - width -= 16; - } -} -#endif - -#ifdef HAS_ARGBSHUFFLEROW_AVX512BW -LIBYUV_TARGET_AVX512BW -void ARGBShuffleRow_AVX512BW(const uint8_t* src_argb, - uint8_t* dst_argb, - const uint8_t* shuffler, - int width) { - __m512i control = - _mm512_broadcast_i32x4(_mm_loadu_si128((const __m128i*)shuffler)); - while (width >= 32) { - __m512i row = _mm512_loadu_si512((const __m512i*)src_argb); - __m512i row1 = _mm512_loadu_si512((const __m512i*)(src_argb + 64)); - row = _mm512_shuffle_epi8(row, control); - row1 = _mm512_shuffle_epi8(row1, control); - _mm512_storeu_si512((__m512i*)dst_argb, row); - _mm512_storeu_si512((__m512i*)(dst_argb + 64), row1); - src_argb += 128; - dst_argb += 128; - width -= 32; - } -} -#endif - -#endif #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif -#endif // !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || - // defined(__i386__) || defined(_M_X64) || defined(_M_X86)) && - // ((defined(_MSC_VER) && !defined(__clang__)) || - // defined(LIBYUV_ENABLE_ROWWIN)) +#endif // !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__) || defined(_M_X64) || defined(_M_X86)) && ((defined(_MSC_VER) && !defined(__clang__)) || defined(LIBYUV_ENABLE_ROWWIN)) diff --git a/source/scale.cc b/source/scale.cc index 4b7b2d3bc..9c1e9b264 100644 --- a/source/scale.cc +++ b/source/scale.cc @@ -11,7 +11,6 @@ #include "libyuv/scale.h" #include -#include #include #include "libyuv/cpu_id.h" @@ -40,8 +39,8 @@ static void ScalePlaneDown2(int src_width, int src_height, int dst_width, int dst_height, - ptrdiff_t src_stride, - ptrdiff_t dst_stride, + int src_stride, + int dst_stride, const uint8_t* src_ptr, uint8_t* dst_ptr, enum FilterMode filtering) { @@ -52,7 +51,7 @@ static void ScalePlaneDown2(int src_width, ? ScaleRowDown2_C : (filtering == kFilterLinear ? ScaleRowDown2Linear_C : ScaleRowDown2Box_C); - ptrdiff_t row_stride = src_stride * 2; + int row_stride = src_stride * 2; (void)src_width; (void)src_height; if (!filtering) { @@ -152,8 +151,8 @@ static void ScalePlaneDown2_16(int src_width, int src_height, int dst_width, int dst_height, - ptrdiff_t src_stride, - ptrdiff_t dst_stride, + int src_stride, + int dst_stride, const uint16_t* src_ptr, uint16_t* dst_ptr, enum FilterMode filtering) { @@ -164,7 +163,7 @@ static void ScalePlaneDown2_16(int src_width, ? ScaleRowDown2_16_C : (filtering == kFilterLinear ? ScaleRowDown2Linear_16_C : ScaleRowDown2Box_16_C); - ptrdiff_t row_stride = src_stride * 2; + int row_stride = src_stride * 2; (void)src_width; (void)src_height; if (!filtering) { @@ -229,7 +228,7 @@ void ScalePlaneDown2_16To8(int src_width, ? ScaleRowDown2_16To8_C : (filtering == kFilterLinear ? ScaleRowDown2Linear_16To8_C : ScaleRowDown2Box_16To8_C)); - ptrdiff_t row_stride = (ptrdiff_t)src_stride * 2; + int row_stride = src_stride * 2; (void)dst_height; if (!filtering) { src_ptr += src_stride; // Point to odd rows. @@ -260,8 +259,8 @@ static void ScalePlaneDown4(int src_width, int src_height, int dst_width, int dst_height, - ptrdiff_t src_stride, - ptrdiff_t dst_stride, + int src_stride, + int dst_stride, const uint8_t* src_ptr, uint8_t* dst_ptr, enum FilterMode filtering) { @@ -269,7 +268,7 @@ static void ScalePlaneDown4(int src_width, void (*ScaleRowDown4)(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) = filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C; - ptrdiff_t row_stride = src_stride * 4; + int row_stride = src_stride * 4; (void)src_width; (void)src_height; if (!filtering) { @@ -332,8 +331,8 @@ static void ScalePlaneDown4_16(int src_width, int src_height, int dst_width, int dst_height, - ptrdiff_t src_stride, - ptrdiff_t dst_stride, + int src_stride, + int dst_stride, const uint16_t* src_ptr, uint16_t* dst_ptr, enum FilterMode filtering) { @@ -341,7 +340,7 @@ static void ScalePlaneDown4_16(int src_width, void (*ScaleRowDown4)(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, int dst_width) = filtering ? ScaleRowDown4Box_16_C : ScaleRowDown4_16_C; - ptrdiff_t row_stride = src_stride * 4; + int row_stride = src_stride * 4; (void)src_width; (void)src_height; if (!filtering) { @@ -376,8 +375,8 @@ static void ScalePlaneDown34(int src_width, int src_height, int dst_width, int dst_height, - ptrdiff_t src_stride, - ptrdiff_t dst_stride, + int src_stride, + int dst_stride, const uint8_t* src_ptr, uint8_t* dst_ptr, enum FilterMode filtering) { @@ -386,7 +385,7 @@ static void ScalePlaneDown34(int src_width, uint8_t* dst_ptr, int dst_width); void (*ScaleRowDown34_1)(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); - const ptrdiff_t filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; + const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; (void)src_width; (void)src_height; assert(dst_width % 3 == 0); @@ -503,8 +502,8 @@ static void ScalePlaneDown34_16(int src_width, int src_height, int dst_width, int dst_height, - ptrdiff_t src_stride, - ptrdiff_t dst_stride, + int src_stride, + int dst_stride, const uint16_t* src_ptr, uint16_t* dst_ptr, enum FilterMode filtering) { @@ -513,7 +512,7 @@ static void ScalePlaneDown34_16(int src_width, uint16_t* dst_ptr, int dst_width); void (*ScaleRowDown34_1)(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, int dst_width); - const ptrdiff_t filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; + const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; (void)src_width; (void)src_height; assert(dst_width % 3 == 0); @@ -589,8 +588,8 @@ static void ScalePlaneDown38(int src_width, int src_height, int dst_width, int dst_height, - ptrdiff_t src_stride, - ptrdiff_t dst_stride, + int src_stride, + int dst_stride, const uint8_t* src_ptr, uint8_t* dst_ptr, enum FilterMode filtering) { @@ -599,7 +598,7 @@ static void ScalePlaneDown38(int src_width, uint8_t* dst_ptr, int dst_width); void (*ScaleRowDown38_2)(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); - const ptrdiff_t filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; + const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; assert(dst_width % 3 == 0); (void)src_width; (void)src_height; @@ -709,8 +708,8 @@ static void ScalePlaneDown38_16(int src_width, int src_height, int dst_width, int dst_height, - ptrdiff_t src_stride, - ptrdiff_t dst_stride, + int src_stride, + int dst_stride, const uint16_t* src_ptr, uint16_t* dst_ptr, enum FilterMode filtering) { @@ -719,7 +718,7 @@ static void ScalePlaneDown38_16(int src_width, uint16_t* dst_ptr, int dst_width); void (*ScaleRowDown38_2)(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, int dst_width); - const ptrdiff_t filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; + const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; (void)src_width; (void)src_height; assert(dst_width % 3 == 0); @@ -902,8 +901,8 @@ static int ScalePlaneBox(int src_width, int src_height, int dst_width, int dst_height, - ptrdiff_t src_stride, - ptrdiff_t dst_stride, + int src_stride, + int dst_stride, const uint8_t* src_ptr, uint8_t* dst_ptr) { int j, k; @@ -968,7 +967,7 @@ static int ScalePlaneBox(int src_width, for (j = 0; j < dst_height; ++j) { int boxheight; int iy = y >> 16; - const uint8_t* src = src_ptr + iy * src_stride; + const uint8_t* src = src_ptr + iy * (int64_t)src_stride; y += dy; if (y > max_y) { y = max_y; @@ -991,8 +990,8 @@ static int ScalePlaneBox_16(int src_width, int src_height, int dst_width, int dst_height, - ptrdiff_t src_stride, - ptrdiff_t dst_stride, + int src_stride, + int dst_stride, const uint16_t* src_ptr, uint16_t* dst_ptr) { int j, k; @@ -1025,7 +1024,7 @@ static int ScalePlaneBox_16(int src_width, for (j = 0; j < dst_height; ++j) { int boxheight; int iy = y >> 16; - const uint16_t* src = src_ptr + iy * src_stride; + const uint16_t* src = src_ptr + iy * (int64_t)src_stride; y += dy; if (y > max_y) { y = max_y; @@ -1049,8 +1048,8 @@ static int ScalePlaneBilinearDown(int src_width, int src_height, int dst_width, int dst_height, - ptrdiff_t src_stride, - ptrdiff_t dst_stride, + int src_stride, + int dst_stride, const uint8_t* src_ptr, uint8_t* dst_ptr, enum FilterMode filtering) { @@ -1077,6 +1076,14 @@ static int ScalePlaneBilinearDown(int src_width, &dx, &dy); src_width = Abs(src_width); +#if defined(HAS_INTERPOLATEROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + InterpolateRow = InterpolateRow_Any_SSSE3; + if (IS_ALIGNED(src_width, 16)) { + InterpolateRow = InterpolateRow_SSSE3; + } + } +#endif #if defined(HAS_INTERPOLATEROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { InterpolateRow = InterpolateRow_Any_AVX2; @@ -1139,7 +1146,7 @@ static int ScalePlaneBilinearDown(int src_width, for (j = 0; j < dst_height; ++j) { int yi = y >> 16; - const uint8_t* src = src_ptr + yi * src_stride; + const uint8_t* src = src_ptr + yi * (int64_t)src_stride; if (filtering == kFilterLinear) { ScaleFilterCols(dst_ptr, src, dst_width, x, dx); } else { @@ -1161,8 +1168,8 @@ static int ScalePlaneBilinearDown_16(int src_width, int src_height, int dst_width, int dst_height, - ptrdiff_t src_stride, - ptrdiff_t dst_stride, + int src_stride, + int dst_stride, const uint16_t* src_ptr, uint16_t* dst_ptr, enum FilterMode filtering) { @@ -1189,6 +1196,14 @@ static int ScalePlaneBilinearDown_16(int src_width, &dx, &dy); src_width = Abs(src_width); +#if defined(HAS_INTERPOLATEROW_16_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + InterpolateRow = InterpolateRow_16_Any_SSE2; + if (IS_ALIGNED(src_width, 16)) { + InterpolateRow = InterpolateRow_16_SSE2; + } + } +#endif #if defined(HAS_INTERPOLATEROW_16_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { InterpolateRow = InterpolateRow_16_Any_SSSE3; @@ -1230,7 +1245,7 @@ static int ScalePlaneBilinearDown_16(int src_width, for (j = 0; j < dst_height; ++j) { int yi = y >> 16; - const uint16_t* src = src_ptr + yi * src_stride; + const uint16_t* src = src_ptr + yi * (int64_t)src_stride; if (filtering == kFilterLinear) { ScaleFilterCols(dst_ptr, src, dst_width, x, dx); } else { @@ -1253,8 +1268,8 @@ static int ScalePlaneBilinearUp(int src_width, int src_height, int dst_width, int dst_height, - ptrdiff_t src_stride, - ptrdiff_t dst_stride, + int src_stride, + int dst_stride, const uint8_t* src_ptr, uint8_t* dst_ptr, enum FilterMode filtering) { @@ -1275,6 +1290,14 @@ static int ScalePlaneBilinearUp(int src_width, &dx, &dy); src_width = Abs(src_width); +#if defined(HAS_INTERPOLATEROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + InterpolateRow = InterpolateRow_Any_SSSE3; + if (IS_ALIGNED(dst_width, 16)) { + InterpolateRow = InterpolateRow_SSSE3; + } + } +#endif #if defined(HAS_INTERPOLATEROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { InterpolateRow = InterpolateRow_Any_AVX2; @@ -1340,7 +1363,7 @@ static int ScalePlaneBilinearUp(int src_width, } { int yi = y >> 16; - const uint8_t* src = src_ptr + yi * src_stride; + const uint8_t* src = src_ptr + yi * (int64_t)src_stride; // Allocate 2 row buffers. const int row_size = (dst_width + 31) & ~31; @@ -1349,7 +1372,7 @@ static int ScalePlaneBilinearUp(int src_width, return 1; uint8_t* rowptr = row; - ptrdiff_t rowstride = row_size; + int rowstride = row_size; int lasty = yi; ScaleFilterCols(rowptr, src, dst_width, x, dx); @@ -1367,7 +1390,7 @@ static int ScalePlaneBilinearUp(int src_width, if (y > max_y) { y = max_y; yi = y >> 16; - src = src_ptr + yi * src_stride; + src = src_ptr + yi * (int64_t)src_stride; } if (yi != lasty) { ScaleFilterCols(rowptr, src, dst_width, x, dx); @@ -1402,8 +1425,8 @@ static void ScalePlaneUp2_Linear(int src_width, int src_height, int dst_width, int dst_height, - ptrdiff_t src_stride, - ptrdiff_t dst_stride, + int src_stride, + int dst_stride, const uint8_t* src_ptr, uint8_t* dst_ptr) { void (*ScaleRowUp)(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) = @@ -1446,13 +1469,13 @@ static void ScalePlaneUp2_Linear(int src_width, #endif if (dst_height == 1) { - ScaleRowUp(src_ptr + ((src_height - 1) / 2) * src_stride, dst_ptr, + ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr, dst_width); } else { dy = FixedDiv(src_height - 1, dst_height - 1); y = (1 << 15) - 1; for (i = 0; i < dst_height; ++i) { - ScaleRowUp(src_ptr + (y >> 16) * src_stride, dst_ptr, dst_width); + ScaleRowUp(src_ptr + (y >> 16) * (int64_t)src_stride, dst_ptr, dst_width); dst_ptr += dst_stride; y += dy; } @@ -1467,8 +1490,8 @@ static void ScalePlaneUp2_Bilinear(int src_width, int src_height, int dst_width, int dst_height, - ptrdiff_t src_stride, - ptrdiff_t dst_stride, + int src_stride, + int dst_stride, const uint8_t* src_ptr, uint8_t* dst_ptr) { void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride, @@ -1533,8 +1556,8 @@ static void ScalePlaneUp2_12_Linear(int src_width, int src_height, int dst_width, int dst_height, - ptrdiff_t src_stride, - ptrdiff_t dst_stride, + int src_stride, + int dst_stride, const uint16_t* src_ptr, uint16_t* dst_ptr) { void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr, @@ -1566,13 +1589,13 @@ static void ScalePlaneUp2_12_Linear(int src_width, #endif if (dst_height == 1) { - ScaleRowUp(src_ptr + ((src_height - 1) / 2) * src_stride, dst_ptr, + ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr, dst_width); } else { dy = FixedDiv(src_height - 1, dst_height - 1); y = (1 << 15) - 1; for (i = 0; i < dst_height; ++i) { - ScaleRowUp(src_ptr + (y >> 16) * src_stride, dst_ptr, dst_width); + ScaleRowUp(src_ptr + (y >> 16) * (int64_t)src_stride, dst_ptr, dst_width); dst_ptr += dst_stride; y += dy; } @@ -1588,8 +1611,8 @@ static void ScalePlaneUp2_12_Bilinear(int src_width, int src_height, int dst_width, int dst_height, - ptrdiff_t src_stride, - ptrdiff_t dst_stride, + int src_stride, + int dst_stride, const uint16_t* src_ptr, uint16_t* dst_ptr) { void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride, @@ -1636,8 +1659,8 @@ static void ScalePlaneUp2_16_Linear(int src_width, int src_height, int dst_width, int dst_height, - ptrdiff_t src_stride, - ptrdiff_t dst_stride, + int src_stride, + int dst_stride, const uint16_t* src_ptr, uint16_t* dst_ptr) { void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr, @@ -1669,13 +1692,13 @@ static void ScalePlaneUp2_16_Linear(int src_width, #endif if (dst_height == 1) { - ScaleRowUp(src_ptr + ((src_height - 1) / 2) * src_stride, dst_ptr, + ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr, dst_width); } else { dy = FixedDiv(src_height - 1, dst_height - 1); y = (1 << 15) - 1; for (i = 0; i < dst_height; ++i) { - ScaleRowUp(src_ptr + (y >> 16) * src_stride, dst_ptr, dst_width); + ScaleRowUp(src_ptr + (y >> 16) * (int64_t)src_stride, dst_ptr, dst_width); dst_ptr += dst_stride; y += dy; } @@ -1686,8 +1709,8 @@ static void ScalePlaneUp2_16_Bilinear(int src_width, int src_height, int dst_width, int dst_height, - ptrdiff_t src_stride, - ptrdiff_t dst_stride, + int src_stride, + int dst_stride, const uint16_t* src_ptr, uint16_t* dst_ptr) { void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride, @@ -1734,8 +1757,8 @@ static int ScalePlaneBilinearUp_16(int src_width, int src_height, int dst_width, int dst_height, - ptrdiff_t src_stride, - ptrdiff_t dst_stride, + int src_stride, + int dst_stride, const uint16_t* src_ptr, uint16_t* dst_ptr, enum FilterMode filtering) { @@ -1756,6 +1779,14 @@ static int ScalePlaneBilinearUp_16(int src_width, &dx, &dy); src_width = Abs(src_width); +#if defined(HAS_INTERPOLATEROW_16_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + InterpolateRow = InterpolateRow_16_Any_SSE2; + if (IS_ALIGNED(dst_width, 16)) { + InterpolateRow = InterpolateRow_16_SSE2; + } + } +#endif #if defined(HAS_INTERPOLATEROW_16_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { InterpolateRow = InterpolateRow_16_Any_SSSE3; @@ -1807,12 +1838,12 @@ static int ScalePlaneBilinearUp_16(int src_width, } { int yi = y >> 16; - const uint16_t* src = src_ptr + yi * src_stride; + const uint16_t* src = src_ptr + yi * (int64_t)src_stride; // Allocate 2 row buffers. const int row_size = (dst_width + 31) & ~31; align_buffer_64(row, row_size * 4); - ptrdiff_t rowstride = row_size; + int rowstride = row_size; int lasty = yi; uint16_t* rowptr = (uint16_t*)row; if (!row) @@ -1833,7 +1864,7 @@ static int ScalePlaneBilinearUp_16(int src_width, if (y > max_y) { y = max_y; yi = y >> 16; - src = src_ptr + yi * src_stride; + src = src_ptr + yi * (int64_t)src_stride; } if (yi != lasty) { ScaleFilterCols(rowptr, src, dst_width, x, dx); @@ -1868,8 +1899,8 @@ static void ScalePlaneSimple(int src_width, int src_height, int dst_width, int dst_height, - ptrdiff_t src_stride, - ptrdiff_t dst_stride, + int src_stride, + int dst_stride, const uint8_t* src_ptr, uint8_t* dst_ptr) { int i; @@ -1894,7 +1925,8 @@ static void ScalePlaneSimple(int src_width, } for (i = 0; i < dst_height; ++i) { - ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx); + ScaleCols(dst_ptr, src_ptr + (y >> 16) * (int64_t)src_stride, dst_width, x, + dx); dst_ptr += dst_stride; y += dy; } @@ -1904,8 +1936,8 @@ static void ScalePlaneSimple_16(int src_width, int src_height, int dst_width, int dst_height, - ptrdiff_t src_stride, - ptrdiff_t dst_stride, + int src_stride, + int dst_stride, const uint16_t* src_ptr, uint16_t* dst_ptr) { int i; @@ -1930,7 +1962,8 @@ static void ScalePlaneSimple_16(int src_width, } for (i = 0; i < dst_height; ++i) { - ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx); + ScaleCols(dst_ptr, src_ptr + (y >> 16) * (int64_t)src_stride, dst_width, x, + dx); dst_ptr += dst_stride; y += dy; } @@ -1948,14 +1981,6 @@ int ScalePlane(const uint8_t* src, int dst_width, int dst_height, enum FilterMode filtering) { - // Reject dimensions larger than 32768 (or smaller than -32768 for height). - // This prevents FixedDiv signed integer overflows that can lead to division - // by zero/overflow crashes (SIGFPE on x86) or incorrect step calculations. - if (!src || src_width <= 0 || src_height == 0 || src_width > 32768 || - src_height < -32768 || src_height > 32768 || !dst || dst_width <= 0 || - dst_height <= 0) { - return -1; - } // Simplify filtering when possible. filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height, filtering); @@ -1963,7 +1988,7 @@ int ScalePlane(const uint8_t* src, // Negative height means invert the image. if (src_height < 0) { src_height = -src_height; - src = src + (src_height - 1) * (ptrdiff_t)src_stride; + src = src + (src_height - 1) * (int64_t)src_stride; src_stride = -src_stride; } // Use specialized scales to improve performance for common resolutions. @@ -2056,14 +2081,6 @@ int ScalePlane_16(const uint16_t* src, int dst_width, int dst_height, enum FilterMode filtering) { - // Reject dimensions larger than 32768 (or smaller than -32768 for height). - // This prevents FixedDiv signed integer overflows that can lead to division - // by zero/overflow crashes (SIGFPE on x86) or incorrect step calculations. - if (!src || src_width <= 0 || src_height == 0 || src_width > 32768 || - src_height < -32768 || src_height > 32768 || !dst || dst_width <= 0 || - dst_height <= 0) { - return -1; - } // Simplify filtering when possible. filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height, filtering); @@ -2071,7 +2088,7 @@ int ScalePlane_16(const uint16_t* src, // Negative height means invert the image. if (src_height < 0) { src_height = -src_height; - src = src + (src_height - 1) * (ptrdiff_t)src_stride; + src = src + (src_height - 1) * (int64_t)src_stride; src_stride = -src_stride; } // Use specialized scales to improve performance for common resolutions. @@ -2168,14 +2185,6 @@ int ScalePlane_12(const uint16_t* src, int dst_width, int dst_height, enum FilterMode filtering) { - // Reject dimensions larger than 32768 (or smaller than -32768 for height). - // This prevents FixedDiv signed integer overflows that can lead to division - // by zero/overflow crashes (SIGFPE on x86) or incorrect step calculations. - if (!src || src_width <= 0 || src_height == 0 || src_width > 32768 || - src_height < -32768 || src_height > 32768 || !dst || dst_width <= 0 || - dst_height <= 0) { - return -1; - } // Simplify filtering when possible. filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height, filtering); @@ -2183,7 +2192,7 @@ int ScalePlane_12(const uint16_t* src, // Negative height means invert the image. if (src_height < 0) { src_height = -src_height; - src = src + (src_height - 1) * (ptrdiff_t)src_stride; + src = src + (src_height - 1) * (int64_t)src_stride; src_stride = -src_stride; } @@ -2224,17 +2233,17 @@ int I420Scale(const uint8_t* src_y, int dst_width, int dst_height, enum FilterMode filtering) { - int r; - - if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || - src_height == INT_MIN || !dst_y || !dst_u || !dst_v || dst_width <= 0 || - dst_height <= 0) { - return -1; - } int src_halfwidth = SUBSAMPLE(src_width, 1, 1); int src_halfheight = SUBSAMPLE(src_height, 1, 1); int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); + int r; + + if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || + src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || + dst_width <= 0 || dst_height <= 0) { + return -1; + } r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, dst_width, dst_height, filtering); @@ -2269,17 +2278,17 @@ int I420Scale_16(const uint16_t* src_y, int dst_width, int dst_height, enum FilterMode filtering) { - int r; - - if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || - src_height == INT_MIN || !dst_y || !dst_u || !dst_v || dst_width <= 0 || - dst_height <= 0) { - return -1; - } int src_halfwidth = SUBSAMPLE(src_width, 1, 1); int src_halfheight = SUBSAMPLE(src_height, 1, 1); int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); + int r; + + if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || + src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || + dst_width <= 0 || dst_height <= 0) { + return -1; + } r = ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, dst_width, dst_height, filtering); @@ -2314,17 +2323,17 @@ int I420Scale_12(const uint16_t* src_y, int dst_width, int dst_height, enum FilterMode filtering) { - int r; - - if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || - src_height == INT_MIN || !dst_y || !dst_u || !dst_v || dst_width <= 0 || - dst_height <= 0) { - return -1; - } int src_halfwidth = SUBSAMPLE(src_width, 1, 1); int src_halfheight = SUBSAMPLE(src_height, 1, 1); int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); + int r; + + if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || + src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || + dst_width <= 0 || dst_height <= 0) { + return -1; + } r = ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, dst_width, dst_height, filtering); @@ -2365,8 +2374,8 @@ int I444Scale(const uint8_t* src_y, int r; if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || - src_height == INT_MIN || !dst_y || !dst_u || !dst_v || dst_width <= 0 || - dst_height <= 0) { + src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || + dst_width <= 0 || dst_height <= 0) { return -1; } @@ -2406,8 +2415,8 @@ int I444Scale_16(const uint16_t* src_y, int r; if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || - src_height == INT_MIN || !dst_y || !dst_u || !dst_v || dst_width <= 0 || - dst_height <= 0) { + src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || + dst_width <= 0 || dst_height <= 0) { return -1; } @@ -2447,8 +2456,8 @@ int I444Scale_12(const uint16_t* src_y, int r; if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || - src_height == INT_MIN || !dst_y || !dst_u || !dst_v || dst_width <= 0 || - dst_height <= 0) { + src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || + dst_width <= 0 || dst_height <= 0) { return -1; } @@ -2488,15 +2497,15 @@ int I422Scale(const uint8_t* src_y, int dst_width, int dst_height, enum FilterMode filtering) { + int src_halfwidth = SUBSAMPLE(src_width, 1, 1); + int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); int r; if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || - src_height == INT_MIN || !dst_y || !dst_u || !dst_v || dst_width <= 0 || - dst_height <= 0) { + src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || + dst_width <= 0 || dst_height <= 0) { return -1; } - int src_halfwidth = SUBSAMPLE(src_width, 1, 1); - int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, dst_width, dst_height, filtering); @@ -2531,15 +2540,15 @@ int I422Scale_16(const uint16_t* src_y, int dst_width, int dst_height, enum FilterMode filtering) { + int src_halfwidth = SUBSAMPLE(src_width, 1, 1); + int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); int r; if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || - src_height == INT_MIN || !dst_y || !dst_u || !dst_v || dst_width <= 0 || - dst_height <= 0) { + src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || + dst_width <= 0 || dst_height <= 0) { return -1; } - int src_halfwidth = SUBSAMPLE(src_width, 1, 1); - int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); r = ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, dst_width, dst_height, filtering); @@ -2574,15 +2583,15 @@ int I422Scale_12(const uint16_t* src_y, int dst_width, int dst_height, enum FilterMode filtering) { + int src_halfwidth = SUBSAMPLE(src_width, 1, 1); + int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); int r; if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 || - src_height == INT_MIN || !dst_y || !dst_u || !dst_v || dst_width <= 0 || - dst_height <= 0) { + src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || + dst_width <= 0 || dst_height <= 0) { return -1; } - int src_halfwidth = SUBSAMPLE(src_width, 1, 1); - int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); r = ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, dst_width, dst_height, filtering); @@ -2616,17 +2625,17 @@ int NV12Scale(const uint8_t* src_y, int dst_width, int dst_height, enum FilterMode filtering) { - int r; - - if (!src_y || !src_uv || src_width <= 0 || src_height == 0 || - src_height == INT_MIN || !dst_y || !dst_uv || dst_width <= 0 || - dst_height <= 0) { - return -1; - } int src_halfwidth = SUBSAMPLE(src_width, 1, 1); int src_halfheight = SUBSAMPLE(src_height, 1, 1); int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); + int r; + + if (!src_y || !src_uv || src_width <= 0 || src_height == 0 || + src_width > 32768 || src_height > 32768 || !dst_y || !dst_uv || + dst_width <= 0 || dst_height <= 0) { + return -1; + } r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, dst_width, dst_height, filtering); @@ -2655,8 +2664,8 @@ int NV24Scale(const uint8_t* src_y, int r; if (!src_y || !src_uv || src_width <= 0 || src_height == 0 || - src_height == INT_MIN || !dst_y || !dst_uv || dst_width <= 0 || - dst_height <= 0) { + src_width > 32768 || src_height > 32768 || !dst_y || !dst_uv || + dst_width <= 0 || dst_height <= 0) { return -1; } diff --git a/source/scale_argb.cc b/source/scale_argb.cc index 4dc446d5e..506409c15 100644 --- a/source/scale_argb.cc +++ b/source/scale_argb.cc @@ -38,8 +38,8 @@ static void ScaleARGBDown2(int src_width, int src_height, int dst_width, int dst_height, - ptrdiff_t src_stride, - ptrdiff_t dst_stride, + int src_stride, + int dst_stride, const uint8_t* src_argb, uint8_t* dst_argb, int x, @@ -48,7 +48,7 @@ static void ScaleARGBDown2(int src_width, int dy, enum FilterMode filtering) { int j; - ptrdiff_t row_stride = src_stride * (dy >> 16); + int row_stride = src_stride * (dy >> 16); void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride, uint8_t* dst_argb, int dst_width) = filtering == kFilterNone @@ -62,9 +62,9 @@ static void ScaleARGBDown2(int src_width, assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2. // Advance to odd row, even column. if (filtering == kFilterBilinear) { - src_argb += (y >> 16) * src_stride + (x >> 16) * 4; + src_argb += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4; } else { - src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4; + src_argb += (y >> 16) * (intptr_t)src_stride + ((x >> 16) - 1) * 4; } #if defined(HAS_SCALEARGBROWDOWN2_SSE2) @@ -152,8 +152,8 @@ static int ScaleARGBDown4Box(int src_width, int src_height, int dst_width, int dst_height, - ptrdiff_t src_stride, - ptrdiff_t dst_stride, + int src_stride, + int dst_stride, const uint8_t* src_argb, uint8_t* dst_argb, int x, @@ -169,12 +169,12 @@ static int ScaleARGBDown4Box(int src_width, align_buffer_64(row, row_size * 2); if (!row) return 1; - ptrdiff_t row_stride = src_stride * (dy >> 16); + int row_stride = src_stride * (dy >> 16); void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride, uint8_t* dst_argb, int dst_width) = ScaleARGBRowDown2Box_C; // Advance to odd row, even column. - src_argb += (y >> 16) * src_stride + (x >> 16) * 4; + src_argb += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4; (void)src_width; (void)src_height; (void)dx; @@ -226,8 +226,8 @@ static void ScaleARGBDownEven(int src_width, int src_height, int dst_width, int dst_height, - ptrdiff_t src_stride, - ptrdiff_t dst_stride, + int src_stride, + int dst_stride, const uint8_t* src_argb, uint8_t* dst_argb, int x, @@ -237,7 +237,7 @@ static void ScaleARGBDownEven(int src_width, enum FilterMode filtering) { int j; int col_step = dx >> 16; - ptrdiff_t row_stride = (dy >> 16) * src_stride; + ptrdiff_t row_stride = (ptrdiff_t)((dy >> 16) * (intptr_t)src_stride); void (*ScaleARGBRowDownEven)(const uint8_t* src_argb, ptrdiff_t src_stride, int src_step, uint8_t* dst_argb, int dst_width) = filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C; @@ -245,7 +245,7 @@ static void ScaleARGBDownEven(int src_width, (void)src_height; assert(IS_ALIGNED(src_width, 2)); assert(IS_ALIGNED(src_height, 2)); - src_argb += (y >> 16) * src_stride + (x >> 16) * 4; + src_argb += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4; #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2 @@ -302,8 +302,8 @@ static int ScaleARGBBilinearDown(int src_width, int src_height, int dst_width, int dst_height, - ptrdiff_t src_stride, - ptrdiff_t dst_stride, + int src_stride, + int dst_stride, const uint8_t* src_argb, uint8_t* dst_argb, int x, @@ -331,6 +331,14 @@ static int ScaleARGBBilinearDown(int src_width, clip_src_width = (int)(xr - xl) * 4; // Width aligned to 4. src_argb += xl * 4; x -= (int)(xl << 16); +#if defined(HAS_INTERPOLATEROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + InterpolateRow = InterpolateRow_Any_SSSE3; + if (IS_ALIGNED(clip_src_width, 16)) { + InterpolateRow = InterpolateRow_SSSE3; + } + } +#endif #if defined(HAS_INTERPOLATEROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { InterpolateRow = InterpolateRow_Any_AVX2; @@ -405,7 +413,7 @@ static int ScaleARGBBilinearDown(int src_width, } for (j = 0; j < dst_height; ++j) { int yi = y >> 16; - const uint8_t* src = src_argb + yi * src_stride; + const uint8_t* src = src_argb + yi * (intptr_t)src_stride; if (filtering == kFilterLinear) { ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx); } else { @@ -429,8 +437,8 @@ static int ScaleARGBBilinearUp(int src_width, int src_height, int dst_width, int dst_height, - ptrdiff_t src_stride, - ptrdiff_t dst_stride, + int src_stride, + int dst_stride, const uint8_t* src_argb, uint8_t* dst_argb, int x, @@ -446,6 +454,14 @@ static int ScaleARGBBilinearUp(int src_width, int dst_width, int x, int dx) = filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C; const int max_y = (src_height - 1) << 16; +#if defined(HAS_INTERPOLATEROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + InterpolateRow = InterpolateRow_Any_SSSE3; + if (IS_ALIGNED(dst_width, 4)) { + InterpolateRow = InterpolateRow_SSSE3; + } + } +#endif #if defined(HAS_INTERPOLATEROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { InterpolateRow = InterpolateRow_Any_AVX2; @@ -546,7 +562,7 @@ static int ScaleARGBBilinearUp(int src_width, { int yi = y >> 16; - const uint8_t* src = src_argb + yi * src_stride; + const uint8_t* src = src_argb + yi * (intptr_t)src_stride; // Allocate 2 rows of ARGB. const int row_size = (dst_width * 4 + 31) & ~31; @@ -555,7 +571,7 @@ static int ScaleARGBBilinearUp(int src_width, return 1; uint8_t* rowptr = row; - ptrdiff_t rowstride = row_size; + int rowstride = row_size; int lasty = yi; ScaleARGBFilterCols(rowptr, src, dst_width, x, dx); @@ -573,7 +589,7 @@ static int ScaleARGBBilinearUp(int src_width, if (y > max_y) { y = max_y; yi = y >> 16; - src = src_argb + yi * src_stride; + src = src_argb + yi * (intptr_t)src_stride; } if (yi != lasty) { ScaleARGBFilterCols(rowptr, src, dst_width, x, dx); @@ -599,6 +615,283 @@ static int ScaleARGBBilinearUp(int src_width, return 0; } +#ifdef YUVSCALEUP +// Scale YUV to ARGB up with bilinear interpolation. +static int ScaleYUVToARGBBilinearUp(int src_width, + int src_height, + int dst_width, + int dst_height, + int src_stride_y, + int src_stride_u, + int src_stride_v, + int dst_stride_argb, + const uint8_t* src_y, + const uint8_t* src_u, + const uint8_t* src_v, + uint8_t* dst_argb, + int x, + int dx, + int y, + int dy, + enum FilterMode filtering) { + int j; + void (*I422ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf, + const uint8_t* v_buf, uint8_t* rgb_buf, int width) = + I422ToARGBRow_C; +#if defined(HAS_I422TOARGBROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + I422ToARGBRow = I422ToARGBRow_Any_SSSE3; + if (IS_ALIGNED(src_width, 8)) { + I422ToARGBRow = I422ToARGBRow_SSSE3; + } + } +#endif +#if defined(HAS_I422TOARGBROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + I422ToARGBRow = I422ToARGBRow_Any_AVX2; + if (IS_ALIGNED(src_width, 16)) { + I422ToARGBRow = I422ToARGBRow_AVX2; + } + } +#endif +#if defined(HAS_I422TOARGBROW_AVX512BW) + if (TestCpuFlag(kCpuHasAVX512BW | kCpuHasAVX512VL) == + (kCpuHasAVX512BW | kCpuHasAVX512VL)) { + I422ToARGBRow = I422ToARGBRow_Any_AVX512BW; + if (IS_ALIGNED(src_width, 32)) { + I422ToARGBRow = I422ToARGBRow_AVX512BW; + } + } +#endif +#if defined(HAS_I422TOARGBROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + I422ToARGBRow = I422ToARGBRow_Any_NEON; + if (IS_ALIGNED(src_width, 8)) { + I422ToARGBRow = I422ToARGBRow_NEON; + } + } +#endif +#if defined(HAS_I422TOARGBROW_SVE2) + if (TestCpuFlag(kCpuHasSVE2)) { + I422ToARGBRow = I422ToARGBRow_SVE2; + } +#endif +#if defined(HAS_I422TOARGBROW_SME) + if (TestCpuFlag(kCpuHasSME)) { + I422ToARGBRow = I422ToARGBRow_SME; + } +#endif +#if defined(HAS_I422TOARGBROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + I422ToARGBRow = I422ToARGBRow_Any_LSX; + if (IS_ALIGNED(src_width, 16)) { + I422ToARGBRow = I422ToARGBRow_LSX; + } + } +#endif +#if defined(HAS_I422TOARGBROW_LASX) + if (TestCpuFlag(kCpuHasLASX)) { + I422ToARGBRow = I422ToARGBRow_Any_LASX; + if (IS_ALIGNED(src_width, 32)) { + I422ToARGBRow = I422ToARGBRow_LASX; + } + } +#endif +#if defined(HAS_I422TOARGBROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + I422ToARGBRow = I422ToARGBRow_RVV; + } +#endif + + void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb, + ptrdiff_t src_stride, int dst_width, + int source_y_fraction) = InterpolateRow_C; +#if defined(HAS_INTERPOLATEROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + InterpolateRow = InterpolateRow_Any_SSSE3; + if (IS_ALIGNED(dst_width, 4)) { + InterpolateRow = InterpolateRow_SSSE3; + } + } +#endif +#if defined(HAS_INTERPOLATEROW_AVX2) + if (TestCpuFlag(kCpuHasAVX2)) { + InterpolateRow = InterpolateRow_Any_AVX2; + if (IS_ALIGNED(dst_width, 8)) { + InterpolateRow = InterpolateRow_AVX2; + } + } +#endif +#if defined(HAS_INTERPOLATEROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + InterpolateRow = InterpolateRow_Any_NEON; + if (IS_ALIGNED(dst_width, 4)) { + InterpolateRow = InterpolateRow_NEON; + } + } +#endif +#if defined(HAS_INTERPOLATEROW_SME) + if (TestCpuFlag(kCpuHasSME)) { + InterpolateRow = InterpolateRow_SME; + } +#endif +#if defined(HAS_INTERPOLATEROW_LSX) + if (TestCpuFlag(kCpuHasLSX)) { + InterpolateRow = InterpolateRow_Any_LSX; + if (IS_ALIGNED(dst_width, 8)) { + InterpolateRow = InterpolateRow_LSX; + } + } +#endif +#if defined(HAS_INTERPOLATEROW_RVV) + if (TestCpuFlag(kCpuHasRVV)) { + InterpolateRow = InterpolateRow_RVV; + } +#endif + + void (*ScaleARGBFilterCols)(uint8_t* dst_argb, const uint8_t* src_argb, + int dst_width, int x, int dx) = + filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C; + if (src_width >= 32768) { + ScaleARGBFilterCols = + filtering ? ScaleARGBFilterCols64_C : ScaleARGBCols64_C; + } +#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3) + if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { + ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3; + } +#endif +#if defined(HAS_SCALEARGBFILTERCOLS_NEON) + if (filtering && TestCpuFlag(kCpuHasNEON)) { + ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON; + if (IS_ALIGNED(dst_width, 4)) { + ScaleARGBFilterCols = ScaleARGBFilterCols_NEON; + } + } +#endif +#if defined(HAS_SCALEARGBFILTERCOLS_LSX) + if (filtering && TestCpuFlag(kCpuHasLSX)) { + ScaleARGBFilterCols = ScaleARGBFilterCols_Any_LSX; + if (IS_ALIGNED(dst_width, 8)) { + ScaleARGBFilterCols = ScaleARGBFilterCols_LSX; + } + } +#endif +#if defined(HAS_SCALEARGBFILTERCOLS_RVV) + if (filtering && TestCpuFlag(kCpuHasRVV)) { + ScaleARGBFilterCols = ScaleARGBFilterCols_RVV; + } +#endif +#if defined(HAS_SCALEARGBCOLS_SSE2) + if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) { + ScaleARGBFilterCols = ScaleARGBCols_SSE2; + } +#endif +#if defined(HAS_SCALEARGBCOLS_NEON) + if (!filtering && TestCpuFlag(kCpuHasNEON)) { + ScaleARGBFilterCols = ScaleARGBCols_Any_NEON; + if (IS_ALIGNED(dst_width, 8)) { + ScaleARGBFilterCols = ScaleARGBCols_NEON; + } + } +#endif +#if defined(HAS_SCALEARGBCOLS_LSX) + if (!filtering && TestCpuFlag(kCpuHasLSX)) { + ScaleARGBFilterCols = ScaleARGBCols_Any_LSX; + if (IS_ALIGNED(dst_width, 4)) { + ScaleARGBFilterCols = ScaleARGBCols_LSX; + } + } +#endif + if (!filtering && src_width * 2 == dst_width && x < 0x8000) { + ScaleARGBFilterCols = ScaleARGBColsUp2_C; +#if defined(HAS_SCALEARGBCOLSUP2_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { + ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2; + } +#endif + } + + const int max_y = (src_height - 1) << 16; + if (y > max_y) { + y = max_y; + } + const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate. + int yi = y >> 16; + int uv_yi = yi >> kYShift; + const uint8_t* src_row_y = src_y + yi * (intptr_t)src_stride_y; + const uint8_t* src_row_u = src_u + uv_yi * (intptr_t)src_stride_u; + const uint8_t* src_row_v = src_v + uv_yi * (intptr_t)src_stride_v; + + // Allocate 1 row of ARGB for source conversion and 2 rows of ARGB + // scaled horizontally to the destination width. + const int row_size = (dst_width * 4 + 31) & ~31; + align_buffer_64(row, row_size * 2 + src_width * 4); + + uint8_t* argb_row = row + row_size * 2; + uint8_t* rowptr = row; + int rowstride = row_size; + int lasty = yi; + if (!row) + return 1; + + // TODO(fbarchard): Convert first 2 rows of YUV to ARGB. + ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx); + if (src_height > 1) { + src_row_y += src_stride_y; + if (yi & 1) { + src_row_u += src_stride_u; + src_row_v += src_stride_v; + } + } + ScaleARGBFilterCols(rowptr + rowstride, src_row_y, dst_width, x, dx); + if (src_height > 2) { + src_row_y += src_stride_y; + if (!(yi & 1)) { + src_row_u += src_stride_u; + src_row_v += src_stride_v; + } + } + + for (j = 0; j < dst_height; ++j) { + yi = y >> 16; + if (yi != lasty) { + if (y > max_y) { + y = max_y; + yi = y >> 16; + uv_yi = yi >> kYShift; + src_row_y = src_y + yi * (intptr_t)src_stride_y; + src_row_u = src_u + uv_yi * (intptr_t)src_stride_u; + src_row_v = src_v + uv_yi * (intptr_t)src_stride_v; + } + if (yi != lasty) { + // TODO(fbarchard): Convert the clipped region of row. + I422ToARGBRow(src_row_y, src_row_u, src_row_v, argb_row, src_width); + ScaleARGBFilterCols(rowptr, argb_row, dst_width, x, dx); + rowptr += rowstride; + rowstride = -rowstride; + lasty = yi; + src_row_y += src_stride_y; + if (yi & 1) { + src_row_u += src_stride_u; + src_row_v += src_stride_v; + } + } + } + if (filtering == kFilterLinear) { + InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0); + } else { + int yf = (y >> 8) & 255; + InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf); + } + dst_argb += dst_stride_argb; + y += dy; + } + free_aligned_buffer_64(row); + return 0; +} +#endif + // Scale ARGB to/from any dimensions, without interpolation. // Fixed point math is used for performance: The upper 16 bits // of x and dx is the integer part of the source position and @@ -608,8 +901,8 @@ static void ScaleARGBSimple(int src_width, int src_height, int dst_width, int dst_height, - ptrdiff_t src_stride, - ptrdiff_t dst_stride, + int src_stride, + int dst_stride, const uint8_t* src_argb, uint8_t* dst_argb, int x, @@ -652,8 +945,8 @@ static void ScaleARGBSimple(int src_width, } for (j = 0; j < dst_height; ++j) { - ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride, dst_width, x, - dx); + ScaleARGBCols(dst_argb, src_argb + (y >> 16) * (intptr_t)src_stride, + dst_width, x, dx); dst_argb += dst_stride; y += dy; } @@ -688,7 +981,7 @@ static int ScaleARGB(const uint8_t* src, // Negative src_height means invert the image. if (src_height < 0) { src_height = -src_height; - src = src + (src_height - 1) * (ptrdiff_t)src_stride; + src = src + (src_height - 1) * (intptr_t)src_stride; src_stride = -src_stride; } ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, @@ -703,8 +996,8 @@ static int ScaleARGB(const uint8_t* src, if (clip_y) { int64_t clipf = (int64_t)(clip_y)*dy; y += (clipf & 0xffff); - src += (clipf >> 16) * (ptrdiff_t)src_stride; - dst += clip_y * (ptrdiff_t)dst_stride; + src += (clipf >> 16) * (intptr_t)src_stride; + dst += clip_y * dst_stride; } // Special case for integer step values. @@ -737,7 +1030,7 @@ static int ScaleARGB(const uint8_t* src, filtering = kFilterNone; if (dx == 0x10000 && dy == 0x10000) { // Straight copy. - ARGBCopy(src + (y >> 16) * (ptrdiff_t)src_stride + (x >> 16) * 4, + ARGBCopy(src + (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4, src_stride, dst, dst_stride, clip_width, clip_height); return 0; } @@ -779,9 +1072,9 @@ int ARGBScaleClip(const uint8_t* src_argb, int clip_width, int clip_height, enum FilterMode filtering) { - if (!src_argb || src_width == 0 || src_height == 0 || src_height == INT_MIN || - !dst_argb || dst_width <= 0 || dst_height <= 0 || clip_x < 0 || - clip_y < 0 || clip_width > 32768 || clip_height > 32768 || + if (!src_argb || src_width == 0 || src_height == 0 || !dst_argb || + dst_width <= 0 || dst_height <= 0 || clip_x < 0 || clip_y < 0 || + clip_width > 32768 || clip_height > 32768 || (clip_x + clip_width) > dst_width || (clip_y + clip_height) > dst_height) { return -1; @@ -802,9 +1095,8 @@ int ARGBScale(const uint8_t* src_argb, int dst_width, int dst_height, enum FilterMode filtering) { - if (!src_argb || src_width == 0 || src_height == 0 || src_height == INT_MIN || - src_width > 32768 || src_height > 32768 || !dst_argb || dst_width <= 0 || - dst_height <= 0) { + if (!src_argb || src_width == 0 || src_height == 0 || src_width > 32768 || + src_height > 32768 || !dst_argb || dst_width <= 0 || dst_height <= 0) { return -1; } return ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb, @@ -836,13 +1128,12 @@ int YUVToARGBScaleClip(const uint8_t* src_y, int r; (void)src_fourcc; // TODO(fbarchard): implement and/or assert. (void)dst_fourcc; + const int abs_src_height = (src_height < 0) ? -src_height : src_height; if (!src_y || !src_u || !src_v || !dst_argb || src_width <= 0 || - src_width > INT_MAX / 4 || src_height == 0 || src_height == INT_MIN || - dst_width <= 0 || dst_height <= 0 || clip_width <= 0 || - clip_height <= 0) { + src_width > INT_MAX / 4 || src_height == 0 || dst_width <= 0 || + dst_height <= 0 || clip_width <= 0 || clip_height <= 0) { return -1; } - const int abs_src_height = (src_height < 0) ? -src_height : src_height; const uint64_t argb_buffer_size = (uint64_t)src_width * abs_src_height * 4; if (argb_buffer_size > SIZE_MAX) { return -1; // Invalid size. diff --git a/source/scale_common.cc b/source/scale_common.cc index e2447119b..dff17e3ea 100644 --- a/source/scale_common.cc +++ b/source/scale_common.cc @@ -792,10 +792,10 @@ void ScaleFilterCols64_C(uint8_t* dst_ptr, #undef BLENDER // Same as 8 bit arm blender but return is cast to uint16_t -#define BLENDER(a, b, f) \ - (uint16_t)((int)(a) + \ - (int)((((int64_t)((f)) * ((int64_t)(b) - (int)(a))) + 0x8000) >> \ - 16)) +#define BLENDER(a, b, f) \ + (uint16_t)( \ + (int)(a) + \ + (int)((((int64_t)((f)) * ((int64_t)(b) - (int)(a))) + 0x8000) >> 16)) void ScaleFilterCols_16_C(uint16_t* dst_ptr, const uint16_t* src_ptr, @@ -1196,7 +1196,7 @@ void ScaleARGBColsUp2_C(uint8_t* dst_argb, // TODO(fbarchard): Replace 0x7f ^ f with 128-f. bug=607. // Mimics SSSE3 blender -#define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b) * f) >> 7 +#define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b)*f) >> 7 #define BLENDERC(a, b, f, s) \ (uint32_t)(BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s) #define BLENDER(a, b, f) \ @@ -1636,6 +1636,14 @@ void ScalePlaneVertical(int src_height, assert(dst_width > 0); assert(dst_height > 0); src_argb += (x >> 16) * bpp; +#if defined(HAS_INTERPOLATEROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + InterpolateRow = InterpolateRow_Any_SSSE3; + if (IS_ALIGNED(dst_width_bytes, 16)) { + InterpolateRow = InterpolateRow_SSSE3; + } + } +#endif #if defined(HAS_INTERPOLATEROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { InterpolateRow = InterpolateRow_Any_AVX2; @@ -1710,6 +1718,14 @@ void ScalePlaneVertical_16(int src_height, assert(dst_width > 0); assert(dst_height > 0); src_argb += (x >> 16) * wpp; +#if defined(HAS_INTERPOLATEROW_16_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + InterpolateRow = InterpolateRow_16_Any_SSE2; + if (IS_ALIGNED(dst_width_words, 16)) { + InterpolateRow = InterpolateRow_16_SSE2; + } + } +#endif #if defined(HAS_INTERPOLATEROW_16_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { InterpolateRow = InterpolateRow_16_Any_SSSE3; diff --git a/source/scale_gcc.cc b/source/scale_gcc.cc index 773076669..5338482c5 100644 --- a/source/scale_gcc.cc +++ b/source/scale_gcc.cc @@ -1759,25 +1759,25 @@ void ScaleRowUp2_Bilinear_16_AVX2(const uint16_t* src_ptr, void ScaleAddRow_SSE2(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { - asm volatile("pxor %%xmm5,%%xmm5 \n" + asm volatile("pxor %%xmm5,%%xmm5 \n" // 16 pixel loop. LABELALIGN - "1: \n" - "movdqu (%0),%%xmm3 \n" - "lea 0x10(%0),%0 \n" // src_ptr += 16 - "movdqu (%1),%%xmm0 \n" - "movdqu 0x10(%1),%%xmm1 \n" - "movdqa %%xmm3,%%xmm2 \n" - "punpcklbw %%xmm5,%%xmm2 \n" - "punpckhbw %%xmm5,%%xmm3 \n" - "paddusw %%xmm2,%%xmm0 \n" - "paddusw %%xmm3,%%xmm1 \n" - "movdqu %%xmm0,(%1) \n" - "movdqu %%xmm1,0x10(%1) \n" - "lea 0x20(%1),%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" + "1: \n" + "movdqu (%0),%%xmm3 \n" + "lea 0x10(%0),%0 \n" // src_ptr += 16 + "movdqu (%1),%%xmm0 \n" + "movdqu 0x10(%1),%%xmm1 \n" + "movdqa %%xmm3,%%xmm2 \n" + "punpcklbw %%xmm5,%%xmm2 \n" + "punpckhbw %%xmm5,%%xmm3 \n" + "paddusw %%xmm2,%%xmm0 \n" + "paddusw %%xmm3,%%xmm1 \n" + "movdqu %%xmm0,(%1) \n" + "movdqu %%xmm1,0x10(%1) \n" + "lea 0x20(%1),%1 \n" + "sub $0x10,%2 \n" + "jg 1b \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(src_width) // %2 @@ -1790,23 +1790,23 @@ void ScaleAddRow_SSE2(const uint8_t* src_ptr, void ScaleAddRow_AVX2(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { - asm volatile("vpxor %%ymm5,%%ymm5,%%ymm5 \n" + asm volatile("vpxor %%ymm5,%%ymm5,%%ymm5 \n" LABELALIGN - "1: \n" - "vmovdqu (%0),%%ymm3 \n" - "lea 0x20(%0),%0 \n" // src_ptr += 32 - "vpermq $0xd8,%%ymm3,%%ymm3 \n" - "vpunpcklbw %%ymm5,%%ymm3,%%ymm2 \n" - "vpunpckhbw %%ymm5,%%ymm3,%%ymm3 \n" - "vpaddusw (%1),%%ymm2,%%ymm0 \n" - "vpaddusw 0x20(%1),%%ymm3,%%ymm1 \n" - "vmovdqu %%ymm0,(%1) \n" - "vmovdqu %%ymm1,0x20(%1) \n" - "lea 0x40(%1),%1 \n" - "sub $0x20,%2 \n" - "jg 1b \n" - "vzeroupper \n" + "1: \n" + "vmovdqu (%0),%%ymm3 \n" + "lea 0x20(%0),%0 \n" // src_ptr += 32 + "vpermq $0xd8,%%ymm3,%%ymm3 \n" + "vpunpcklbw %%ymm5,%%ymm3,%%ymm2 \n" + "vpunpckhbw %%ymm5,%%ymm3,%%ymm3 \n" + "vpaddusw (%1),%%ymm2,%%ymm0 \n" + "vpaddusw 0x20(%1),%%ymm3,%%ymm1 \n" + "vmovdqu %%ymm0,(%1) \n" + "vmovdqu %%ymm1,0x20(%1) \n" + "lea 0x40(%1),%1 \n" + "sub $0x20,%2 \n" + "jg 1b \n" + "vzeroupper \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(src_width) // %2 diff --git a/source/scale_rgb.cc b/source/scale_rgb.cc index 6040b364e..5e69fe379 100644 --- a/source/scale_rgb.cc +++ b/source/scale_rgb.cc @@ -42,8 +42,8 @@ int RGBScale(const uint8_t* src_rgb, enum FilterMode filtering) { int r; if (!src_rgb || !dst_rgb || src_width <= 0 || src_width > INT_MAX / 4 || - src_height == 0 || src_height == INT_MIN || dst_width <= 0 || - dst_width > INT_MAX / 4 || dst_height <= 0) { + src_height == 0 || dst_width <= 0 || dst_width > INT_MAX / 4 || + dst_height <= 0) { return -1; } const int abs_src_height = (src_height < 0) ? -src_height : src_height; diff --git a/source/scale_uv.cc b/source/scale_uv.cc index 43a464732..3d41a2398 100644 --- a/source/scale_uv.cc +++ b/source/scale_uv.cc @@ -11,7 +11,6 @@ #include "libyuv/scale_uv.h" #include -#include #include #include "libyuv/cpu_id.h" @@ -60,8 +59,8 @@ static void ScaleUVDown2(int src_width, int src_height, int dst_width, int dst_height, - ptrdiff_t src_stride, - ptrdiff_t dst_stride, + int src_stride, + int dst_stride, const uint8_t* src_uv, uint8_t* dst_uv, int x, @@ -70,7 +69,7 @@ static void ScaleUVDown2(int src_width, int dy, enum FilterMode filtering) { int j; - ptrdiff_t row_stride = src_stride * (dy >> 16); + int row_stride = src_stride * (dy >> 16); void (*ScaleUVRowDown2)(const uint8_t* src_uv, ptrdiff_t src_stride, uint8_t* dst_uv, int dst_width) = filtering == kFilterNone @@ -84,9 +83,9 @@ static void ScaleUVDown2(int src_width, assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2. // Advance to odd row, even column. if (filtering == kFilterBilinear) { - src_uv += (y >> 16) * src_stride + (x >> 16) * 2; + src_uv += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2; } else { - src_uv += (y >> 16) * src_stride + ((x >> 16) - 1) * 2; + src_uv += (y >> 16) * (intptr_t)src_stride + ((x >> 16) - 1) * 2; } #if defined(HAS_SCALEUVROWDOWN2BOX_SSSE3) @@ -175,8 +174,8 @@ static int ScaleUVDown4Box(int src_width, int src_height, int dst_width, int dst_height, - ptrdiff_t src_stride, - ptrdiff_t dst_stride, + int src_stride, + int dst_stride, const uint8_t* src_uv, uint8_t* dst_uv, int x, @@ -189,12 +188,12 @@ static int ScaleUVDown4Box(int src_width, align_buffer_64(row, row_size * 2); if (!row) return 1; - ptrdiff_t row_stride = src_stride * (dy >> 16); + int row_stride = src_stride * (dy >> 16); void (*ScaleUVRowDown2)(const uint8_t* src_uv, ptrdiff_t src_stride, uint8_t* dst_uv, int dst_width) = ScaleUVRowDown2Box_C; // Advance to odd row, even column. - src_uv += (y >> 16) * src_stride + (x >> 16) * 2; + src_uv += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2; (void)src_width; (void)src_height; (void)dx; @@ -257,8 +256,8 @@ static void ScaleUVDownEven(int src_width, int src_height, int dst_width, int dst_height, - ptrdiff_t src_stride, - ptrdiff_t dst_stride, + int src_stride, + int dst_stride, const uint8_t* src_uv, uint8_t* dst_uv, int x, @@ -268,7 +267,7 @@ static void ScaleUVDownEven(int src_width, enum FilterMode filtering) { int j; int col_step = dx >> 16; - ptrdiff_t row_stride = (dy >> 16) * src_stride; + ptrdiff_t row_stride = (ptrdiff_t)((dy >> 16) * (intptr_t)src_stride); void (*ScaleUVRowDownEven)(const uint8_t* src_uv, ptrdiff_t src_stride, int src_step, uint8_t* dst_uv, int dst_width) = filtering ? ScaleUVRowDownEvenBox_C : ScaleUVRowDownEven_C; @@ -276,7 +275,7 @@ static void ScaleUVDownEven(int src_width, (void)src_height; assert(IS_ALIGNED(src_width, 2)); assert(IS_ALIGNED(src_height, 2)); - src_uv += (y >> 16) * src_stride + (x >> 16) * 2; + src_uv += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2; #if defined(HAS_SCALEUVROWDOWNEVEN_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ScaleUVRowDownEven = filtering ? ScaleUVRowDownEvenBox_Any_SSSE3 @@ -335,8 +334,8 @@ static int ScaleUVBilinearDown(int src_width, int src_height, int dst_width, int dst_height, - ptrdiff_t src_stride, - ptrdiff_t dst_stride, + int src_stride, + int dst_stride, const uint8_t* src_uv, uint8_t* dst_uv, int x, @@ -364,6 +363,14 @@ static int ScaleUVBilinearDown(int src_width, clip_src_width = (int)(xr - xl) * 2; // Width aligned to 2. src_uv += xl * 2; x -= (int)(xl << 16); +#if defined(HAS_INTERPOLATEROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + InterpolateRow = InterpolateRow_Any_SSSE3; + if (IS_ALIGNED(clip_src_width, 16)) { + InterpolateRow = InterpolateRow_SSSE3; + } + } +#endif #if defined(HAS_INTERPOLATEROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { InterpolateRow = InterpolateRow_Any_AVX2; @@ -423,7 +430,7 @@ static int ScaleUVBilinearDown(int src_width, } for (j = 0; j < dst_height; ++j) { int yi = y >> 16; - const uint8_t* src = src_uv + yi * src_stride; + const uint8_t* src = src_uv + yi * (intptr_t)src_stride; if (filtering == kFilterLinear) { ScaleUVFilterCols(dst_uv, src, dst_width, x, dx); } else { @@ -449,8 +456,8 @@ static int ScaleUVBilinearUp(int src_width, int src_height, int dst_width, int dst_height, - ptrdiff_t src_stride, - ptrdiff_t dst_stride, + int src_stride, + int dst_stride, const uint8_t* src_uv, uint8_t* dst_uv, int x, @@ -466,6 +473,14 @@ static int ScaleUVBilinearUp(int src_width, int dst_width, int x, int dx) = filtering ? ScaleUVFilterCols_C : ScaleUVCols_C; const int max_y = (src_height - 1) << 16; +#if defined(HAS_INTERPOLATEROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + InterpolateRow = InterpolateRow_Any_SSSE3; + if (IS_ALIGNED(dst_width, 8)) { + InterpolateRow = InterpolateRow_SSSE3; + } + } +#endif #if defined(HAS_INTERPOLATEROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { InterpolateRow = InterpolateRow_Any_AVX2; @@ -544,7 +559,7 @@ static int ScaleUVBilinearUp(int src_width, { int yi = y >> 16; - const uint8_t* src = src_uv + yi * src_stride; + const uint8_t* src = src_uv + yi * (intptr_t)src_stride; // Allocate 2 rows of UV. const int row_size = (dst_width * 2 + 15) & ~15; @@ -553,7 +568,7 @@ static int ScaleUVBilinearUp(int src_width, return 1; uint8_t* rowptr = row; - ptrdiff_t rowstride = row_size; + int rowstride = row_size; int lasty = yi; ScaleUVFilterCols(rowptr, src, dst_width, x, dx); @@ -571,7 +586,7 @@ static int ScaleUVBilinearUp(int src_width, if (y > max_y) { y = max_y; yi = y >> 16; - src = src_uv + yi * src_stride; + src = src_uv + yi * (intptr_t)src_stride; } if (yi != lasty) { ScaleUVFilterCols(rowptr, src, dst_width, x, dx); @@ -607,8 +622,8 @@ static void ScaleUVLinearUp2(int src_width, int src_height, int dst_width, int dst_height, - ptrdiff_t src_stride, - ptrdiff_t dst_stride, + int src_stride, + int dst_stride, const uint8_t* src_uv, uint8_t* dst_uv) { void (*ScaleRowUp)(const uint8_t* src_uv, uint8_t* dst_uv, int dst_width) = @@ -646,12 +661,13 @@ static void ScaleUVLinearUp2(int src_width, #endif if (dst_height == 1) { - ScaleRowUp(src_uv + ((src_height - 1) / 2) * src_stride, dst_uv, dst_width); + ScaleRowUp(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride, dst_uv, + dst_width); } else { dy = FixedDiv(src_height - 1, dst_height - 1); y = (1 << 15) - 1; for (i = 0; i < dst_height; ++i) { - ScaleRowUp(src_uv + (y >> 16) * src_stride, dst_uv, dst_width); + ScaleRowUp(src_uv + (y >> 16) * (intptr_t)src_stride, dst_uv, dst_width); dst_uv += dst_stride; y += dy; } @@ -727,8 +743,8 @@ static void ScaleUVLinearUp2_16(int src_width, int src_height, int dst_width, int dst_height, - ptrdiff_t src_stride, - ptrdiff_t dst_stride, + int src_stride, + int dst_stride, const uint16_t* src_uv, uint16_t* dst_uv) { void (*ScaleRowUp)(const uint16_t* src_uv, uint16_t* dst_uv, int dst_width) = @@ -760,12 +776,13 @@ static void ScaleUVLinearUp2_16(int src_width, #endif if (dst_height == 1) { - ScaleRowUp(src_uv + ((src_height - 1) / 2) * src_stride, dst_uv, dst_width); + ScaleRowUp(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride, dst_uv, + dst_width); } else { dy = FixedDiv(src_height - 1, dst_height - 1); y = (1 << 15) - 1; for (i = 0; i < dst_height; ++i) { - ScaleRowUp(src_uv + (y >> 16) * src_stride, dst_uv, dst_width); + ScaleRowUp(src_uv + (y >> 16) * (intptr_t)src_stride, dst_uv, dst_width); dst_uv += dst_stride; y += dy; } @@ -835,8 +852,8 @@ static void ScaleUVSimple(int src_width, int src_height, int dst_width, int dst_height, - ptrdiff_t src_stride, - ptrdiff_t dst_stride, + int src_stride, + int dst_stride, const uint8_t* src_uv, uint8_t* dst_uv, int x, @@ -871,7 +888,8 @@ static void ScaleUVSimple(int src_width, } for (j = 0; j < dst_height; ++j) { - ScaleUVCols(dst_uv, src_uv + (y >> 16) * src_stride, dst_width, x, dx); + ScaleUVCols(dst_uv, src_uv + (y >> 16) * (intptr_t)src_stride, dst_width, x, + dx); dst_uv += dst_stride; y += dy; } @@ -885,13 +903,13 @@ static int UVCopy(const uint8_t* src_uv, int dst_stride_uv, int width, int height) { - if (!src_uv || !dst_uv || width <= 0 || height == 0 || height == INT_MIN) { + if (!src_uv || !dst_uv || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_uv = src_uv + (height - 1) * (ptrdiff_t)src_stride_uv; + src_uv = src_uv + (height - 1) * (intptr_t)src_stride_uv; src_stride_uv = -src_stride_uv; } @@ -905,13 +923,13 @@ static int UVCopy_16(const uint16_t* src_uv, int dst_stride_uv, int width, int height) { - if (!src_uv || !dst_uv || width <= 0 || height == 0 || height == INT_MIN) { + if (!src_uv || !dst_uv || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - src_uv = src_uv + (height - 1) * (ptrdiff_t)src_stride_uv; + src_uv = src_uv + (height - 1) * (intptr_t)src_stride_uv; src_stride_uv = -src_stride_uv; } @@ -949,7 +967,7 @@ static int ScaleUV(const uint8_t* src, // Negative src_height means invert the image. if (src_height < 0) { src_height = -src_height; - src = src + (src_height - 1) * (ptrdiff_t)src_stride; + src = src + (src_height - 1) * (intptr_t)src_stride; src_stride = -src_stride; } ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, @@ -964,8 +982,8 @@ static int ScaleUV(const uint8_t* src, if (clip_y) { int64_t clipf = (int64_t)(clip_y)*dy; y += (clipf & 0xffff); - src += (clipf >> 16) * (ptrdiff_t)src_stride; - dst += clip_y * (ptrdiff_t)dst_stride; + src += (clipf >> 16) * (intptr_t)src_stride; + dst += clip_y * dst_stride; } // Special case for integer step values. @@ -1005,8 +1023,9 @@ static int ScaleUV(const uint8_t* src, #ifdef HAS_UVCOPY if (dx == 0x10000 && dy == 0x10000) { // Straight copy. - return UVCopy(src + (y >> 16) * (ptrdiff_t)src_stride + (x >> 16) * 2, - src_stride, dst, dst_stride, clip_width, clip_height); + UVCopy(src + (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2, + src_stride, dst, dst_stride, clip_width, clip_height); + return 0; } #endif } @@ -1062,8 +1081,7 @@ int UVScale(const uint8_t* src_uv, int dst_height, enum FilterMode filtering) { if (!src_uv || src_width <= 0 || src_height == 0 || src_width > 32768 || - src_height < -32768 || src_height > 32768 || !dst_uv || dst_width <= 0 || - dst_height <= 0) { + src_height > 32768 || !dst_uv || dst_width <= 0 || dst_height <= 0) { return -1; } return ScaleUV(src_uv, src_stride_uv, src_width, src_height, dst_uv, @@ -1085,9 +1103,8 @@ int UVScale_16(const uint16_t* src_uv, enum FilterMode filtering) { int dy = 0; - if (!src_uv || src_width <= 0 || src_height == 0 || src_height == INT_MIN || - src_width > 32768 || src_height > 32768 || !dst_uv || dst_width <= 0 || - dst_height <= 0) { + if (!src_uv || src_width <= 0 || src_height == 0 || src_width > 32768 || + src_height > 32768 || !dst_uv || dst_width <= 0 || dst_height <= 0) { return -1; } @@ -1099,7 +1116,7 @@ int UVScale_16(const uint16_t* src_uv, // Negative src_height means invert the image. if (src_height < 0) { src_height = -src_height; - src_uv = src_uv + (src_height - 1) * (ptrdiff_t)src_stride_uv; + src_uv = src_uv + (src_height - 1) * (intptr_t)src_stride_uv; src_stride_uv = -src_stride_uv; } src_width = Abs(src_width); @@ -1107,17 +1124,16 @@ int UVScale_16(const uint16_t* src_uv, #ifdef HAS_UVCOPY if (!filtering && src_width == dst_width && (src_height % dst_height == 0)) { if (dst_height == 1) { - return UVCopy_16( - src_uv + ((src_height - 1) / 2) * (ptrdiff_t)src_stride_uv, - src_stride_uv, dst_uv, dst_stride_uv, dst_width, dst_height); + UVCopy_16(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride_uv, + src_stride_uv, dst_uv, dst_stride_uv, dst_width, dst_height); + } else { + dy = src_height / dst_height; + UVCopy_16(src_uv + ((dy - 1) / 2) * (intptr_t)src_stride_uv, + (int)(dy * (intptr_t)src_stride_uv), dst_uv, dst_stride_uv, + dst_width, dst_height); } - dy = src_height / dst_height; - if (src_stride_uv > INT_MAX / dy) { - return -1; - } - return UVCopy_16(src_uv + ((dy - 1) / 2) * (ptrdiff_t)src_stride_uv, - dy * src_stride_uv, dst_uv, dst_stride_uv, dst_width, - dst_height); + + return 0; } #endif diff --git a/source/scale_win.cc b/source/scale_win.cc index 4b7fd3590..870ed77b3 100644 --- a/source/scale_win.cc +++ b/source/scale_win.cc @@ -104,7 +104,7 @@ __declspec(naked) void ScaleRowDown2_SSSE3(const uint8_t* src_ptr, movdqu xmm0, [eax] movdqu xmm1, [eax + 16] lea eax, [eax + 32] - psrlw xmm0, 8 // isolate odd pixels. + psrlw xmm0, 8 // isolate odd pixels. psrlw xmm1, 8 packuswb xmm0, xmm1 movdqu [edx], xmm0 @@ -138,7 +138,7 @@ __declspec(naked) void ScaleRowDown2Linear_SSSE3(const uint8_t* src_ptr, lea eax, [eax + 32] pmaddubsw xmm0, xmm4 // horizontal add pmaddubsw xmm1, xmm4 - pavgw xmm0, xmm5 // (x + 1) / 2 + pavgw xmm0, xmm5 // (x + 1) / 2 pavgw xmm1, xmm5 packuswb xmm0, xmm1 movdqu [edx], xmm0 @@ -213,7 +213,7 @@ __declspec(naked) void ScaleRowDown2_AVX2(const uint8_t* src_ptr, vpsrlw ymm0, ymm0, 8 // isolate odd pixels. vpsrlw ymm1, ymm1, 8 vpackuswb ymm0, ymm0, ymm1 - vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb + vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb vmovdqu [edx], ymm0 lea edx, [edx + 32] sub ecx, 32 @@ -249,7 +249,7 @@ __declspec(naked) void ScaleRowDown2Linear_AVX2(const uint8_t* src_ptr, vpavgw ymm0, ymm0, ymm5 // (x + 1) / 2 vpavgw ymm1, ymm1, ymm5 vpackuswb ymm0, ymm0, ymm1 - vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb + vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb vmovdqu [edx], ymm0 lea edx, [edx + 32] sub ecx, 32 @@ -319,7 +319,7 @@ __declspec(naked) void ScaleRowDown4_SSSE3(const uint8_t* src_ptr, // src_stride ignored mov edx, [esp + 12] // dst_ptr mov ecx, [esp + 16] // dst_width - pcmpeqb xmm5, xmm5 // generate mask 0x00ff0000 + pcmpeqb xmm5, xmm5 // generate mask 0x00ff0000 psrld xmm5, 24 pslld xmm5, 16 @@ -424,7 +424,7 @@ __declspec(naked) void ScaleRowDown4_AVX2(const uint8_t* src_ptr, vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb vpsrlw ymm0, ymm0, 8 vpackuswb ymm0, ymm0, ymm0 - vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb + vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb vmovdqu [edx], xmm0 lea edx, [edx + 16] sub ecx, 16 @@ -687,7 +687,7 @@ __declspec(naked) void ScaleRowDown38_SSSE3(const uint8_t* src_ptr, pshufb xmm1, xmm5 paddusb xmm0, xmm1 - movq qword ptr [edx], xmm0 // write 12 pixels + movq qword ptr [edx], xmm0 // write 12 pixels movhlps xmm1, xmm0 movd [edx + 8], xmm1 lea edx, [edx + 12] @@ -1030,7 +1030,7 @@ __declspec(naked) void ScaleARGBRowDown2Linear_SSE2(const uint8_t* src_argb, lea eax, [eax + 32] movdqa xmm2, xmm0 shufps xmm0, xmm1, 0x88 // even pixels - shufps xmm2, xmm1, 0xdd // odd pixels + shufps xmm2, xmm1, 0xdd // odd pixels pavgb xmm0, xmm2 movdqu [edx], xmm0 lea edx, [edx + 16] @@ -1216,7 +1216,7 @@ __declspec(naked) void ScaleARGBCols_SSE2(uint8_t* dst_argb, test ecx, 2 je xloop29 - // 2 Pixels. + // 2 Pixels. movd xmm0, [esi + eax * 4] // 1 source x0 pixels movd xmm1, [esi + edx * 4] // 1 source x1 pixels pextrw eax, xmm2, 5 // get x2 integer. @@ -1229,7 +1229,7 @@ __declspec(naked) void ScaleARGBCols_SSE2(uint8_t* dst_argb, test ecx, 1 je xloop99 - // 1 Pixels. + // 1 Pixels. movd xmm0, [esi + eax * 4] // 1 source x2 pixels movd dword ptr [edi], xmm0 xloop99: diff --git a/unit_test/basictypes_test.cc b/unit_test/basictypes_test.cc index 421b56f85..9aaa2dcd9 100644 --- a/unit_test/basictypes_test.cc +++ b/unit_test/basictypes_test.cc @@ -22,22 +22,22 @@ TEST_F(LibYUVBaseTest, SizeOfTypes) { uint32_t u32 = 1u; int64_t i64 = -1; uint64_t u64 = 1u; - ASSERT_EQ(1u, sizeof(i8)); - ASSERT_EQ(1u, sizeof(u8)); - ASSERT_EQ(2u, sizeof(i16)); - ASSERT_EQ(2u, sizeof(u16)); - ASSERT_EQ(4u, sizeof(i32)); - ASSERT_EQ(4u, sizeof(u32)); - ASSERT_EQ(8u, sizeof(i64)); - ASSERT_EQ(8u, sizeof(u64)); - ASSERT_GT(0, i8); - ASSERT_LT(0u, u8); - ASSERT_GT(0, i16); - ASSERT_LT(0u, u16); - ASSERT_GT(0, i32); - ASSERT_LT(0u, u32); - ASSERT_GT(0, i64); - ASSERT_LT(0u, u64); + EXPECT_EQ(1u, sizeof(i8)); + EXPECT_EQ(1u, sizeof(u8)); + EXPECT_EQ(2u, sizeof(i16)); + EXPECT_EQ(2u, sizeof(u16)); + EXPECT_EQ(4u, sizeof(i32)); + EXPECT_EQ(4u, sizeof(u32)); + EXPECT_EQ(8u, sizeof(i64)); + EXPECT_EQ(8u, sizeof(u64)); + EXPECT_GT(0, i8); + EXPECT_LT(0u, u8); + EXPECT_GT(0, i16); + EXPECT_LT(0u, u16); + EXPECT_GT(0, i32); + EXPECT_LT(0u, u32); + EXPECT_GT(0, i64); + EXPECT_LT(0u, u64); } } // namespace libyuv diff --git a/unit_test/color_test.cc b/unit_test/color_test.cc index 24456a524..01267ff1e 100644 --- a/unit_test/color_test.cc +++ b/unit_test/color_test.cc @@ -22,8 +22,14 @@ namespace libyuv { // TODO(fbarchard): clang x86 has a higher accuracy YUV to RGB. // Port to Visual C and other CPUs +#if !defined(LIBYUV_BIT_EXACT) && !defined(LIBYUV_DISABLE_X86) && \ + (defined(__x86_64__) || defined(__i386__)) +#define ERROR_FULL 5 +#define ERROR_J420 4 +#else #define ERROR_FULL 6 #define ERROR_J420 6 +#endif #define ERROR_R 1 #define ERROR_G 1 #ifdef LIBYUV_UNLIMITED_DATA @@ -113,11 +119,11 @@ namespace libyuv { } \ /* Test C and SIMD match. */ \ for (int i = 0; i < kPixels * 4; ++i) { \ - ASSERT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); \ + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); \ } \ /* Test SIMD is close to original. */ \ for (int i = 0; i < kPixels * 4; ++i) { \ - ASSERT_NEAR(static_cast(orig_pixels[i]), \ + EXPECT_NEAR(static_cast(orig_pixels[i]), \ static_cast(dst_pixels_opt[i]), DIFF); \ } \ \ @@ -425,16 +431,15 @@ TEST_F(LibYUVColorTest, TestRoundToByte) { allb |= b; } } - ASSERT_GE(allb, 0); - ASSERT_LE(allb, 255); + EXPECT_GE(allb, 0); + EXPECT_LE(allb, 255); } // BT.601 limited range YUV to RGB reference static void YUVToRGBReference(int y, int u, int v, int* r, int* g, int* b) { - double y1 = (y - 16) * 1.164; - *r = RoundToByte(y1 - (v - 128) * -1.596); - *g = RoundToByte(y1 - (u - 128) * 0.391 - (v - 128) * 0.813); - *b = RoundToByte(y1 - (u - 128) * -2.018); + *r = RoundToByte((y - 16) * 1.164 - (v - 128) * -1.596); + *g = RoundToByte((y - 16) * 1.164 - (u - 128) * 0.391 - (v - 128) * 0.813); + *b = RoundToByte((y - 16) * 1.164 - (u - 128) * -2.018); } // BT.601 full range YUV to RGB reference (aka JPEG) @@ -447,10 +452,9 @@ static void YUVJToRGBReference(int y, int u, int v, int* r, int* g, int* b) { // BT.709 limited range YUV to RGB reference // See also http://www.equasys.de/colorconversion.html static void YUVHToRGBReference(int y, int u, int v, int* r, int* g, int* b) { - double y1 = (y - 16) * 1.164; - *r = RoundToByte(y1 - (v - 128) * -1.793); - *g = RoundToByte(y1 - (u - 128) * 0.213 - (v - 128) * 0.533); - *b = RoundToByte(y1 - (u - 128) * -2.112); + *r = RoundToByte((y - 16) * 1.164 - (v - 128) * -1.793); + *g = RoundToByte((y - 16) * 1.164 - (u - 128) * 0.213 - (v - 128) * 0.533); + *b = RoundToByte((y - 16) * 1.164 - (u - 128) * -2.112); } // BT.709 full range YUV to RGB reference @@ -462,10 +466,10 @@ static void YUVFToRGBReference(int y, int u, int v, int* r, int* g, int* b) { // BT.2020 limited range YUV to RGB reference static void YUVUToRGBReference(int y, int u, int v, int* r, int* g, int* b) { - double y1 = (y - 16) * 1.164384; - *r = RoundToByte(y1 - (v - 128) * -1.67867); - *g = RoundToByte(y1 - (u - 128) * 0.187326 - (v - 128) * 0.65042); - *b = RoundToByte(y1 - (u - 128) * -2.14177); + *r = RoundToByte((y - 16) * 1.164384 - (v - 128) * -1.67867); + *g = RoundToByte((y - 16) * 1.164384 - (u - 128) * 0.187326 - + (v - 128) * 0.65042); + *b = RoundToByte((y - 16) * 1.164384 - (u - 128) * -2.14177); } // BT.2020 full range YUV to RGB reference @@ -480,48 +484,48 @@ TEST_F(LibYUVColorTest, TestYUV) { // cyan (less red) YUVToRGBReference(240, 255, 0, &r0, &g0, &b0); - ASSERT_EQ(56, r0); - ASSERT_EQ(255, g0); - ASSERT_EQ(255, b0); + EXPECT_EQ(56, r0); + EXPECT_EQ(255, g0); + EXPECT_EQ(255, b0); YUVToRGB(240, 255, 0, &r1, &g1, &b1); - ASSERT_EQ(57, r1); - ASSERT_EQ(255, g1); - ASSERT_EQ(255, b1); + EXPECT_EQ(57, r1); + EXPECT_EQ(255, g1); + EXPECT_EQ(255, b1); // green (less red and blue) YUVToRGBReference(240, 0, 0, &r0, &g0, &b0); - ASSERT_EQ(56, r0); - ASSERT_EQ(255, g0); - ASSERT_EQ(2, b0); + EXPECT_EQ(56, r0); + EXPECT_EQ(255, g0); + EXPECT_EQ(2, b0); YUVToRGB(240, 0, 0, &r1, &g1, &b1); - ASSERT_EQ(57, r1); - ASSERT_EQ(255, g1); + EXPECT_EQ(57, r1); + EXPECT_EQ(255, g1); #ifdef LIBYUV_UNLIMITED_DATA - ASSERT_EQ(3, b1); + EXPECT_EQ(3, b1); #else - ASSERT_EQ(5, b1); + EXPECT_EQ(5, b1); #endif for (int i = 0; i < 256; ++i) { YUVToRGBReference(i, 128, 128, &r0, &g0, &b0); YUVToRGB(i, 128, 128, &r1, &g1, &b1); - ASSERT_NEAR(r0, r1, ERROR_R); - ASSERT_NEAR(g0, g1, ERROR_G); - ASSERT_NEAR(b0, b1, ERROR_B); + EXPECT_NEAR(r0, r1, ERROR_R); + EXPECT_NEAR(g0, g1, ERROR_G); + EXPECT_NEAR(b0, b1, ERROR_B); YUVToRGBReference(i, 0, 0, &r0, &g0, &b0); YUVToRGB(i, 0, 0, &r1, &g1, &b1); - ASSERT_NEAR(r0, r1, ERROR_R); - ASSERT_NEAR(g0, g1, ERROR_G); - ASSERT_NEAR(b0, b1, ERROR_B); + EXPECT_NEAR(r0, r1, ERROR_R); + EXPECT_NEAR(g0, g1, ERROR_G); + EXPECT_NEAR(b0, b1, ERROR_B); YUVToRGBReference(i, 0, 255, &r0, &g0, &b0); YUVToRGB(i, 0, 255, &r1, &g1, &b1); - ASSERT_NEAR(r0, r1, ERROR_R); - ASSERT_NEAR(g0, g1, ERROR_G); - ASSERT_NEAR(b0, b1, ERROR_B); + EXPECT_NEAR(r0, r1, ERROR_R); + EXPECT_NEAR(g0, g1, ERROR_G); + EXPECT_NEAR(b0, b1, ERROR_B); } } @@ -530,47 +534,47 @@ TEST_F(LibYUVColorTest, TestGreyYUV) { // black YUVToRGBReference(16, 128, 128, &r0, &g0, &b0); - ASSERT_EQ(0, r0); - ASSERT_EQ(0, g0); - ASSERT_EQ(0, b0); + EXPECT_EQ(0, r0); + EXPECT_EQ(0, g0); + EXPECT_EQ(0, b0); YUVToRGB(16, 128, 128, &r1, &g1, &b1); - ASSERT_EQ(0, r1); - ASSERT_EQ(0, g1); - ASSERT_EQ(0, b1); + EXPECT_EQ(0, r1); + EXPECT_EQ(0, g1); + EXPECT_EQ(0, b1); // white YUVToRGBReference(240, 128, 128, &r0, &g0, &b0); - ASSERT_EQ(255, r0); - ASSERT_EQ(255, g0); - ASSERT_EQ(255, b0); + EXPECT_EQ(255, r0); + EXPECT_EQ(255, g0); + EXPECT_EQ(255, b0); YUVToRGB(240, 128, 128, &r1, &g1, &b1); - ASSERT_EQ(255, r1); - ASSERT_EQ(255, g1); - ASSERT_EQ(255, b1); + EXPECT_EQ(255, r1); + EXPECT_EQ(255, g1); + EXPECT_EQ(255, b1); // grey YUVToRGBReference(128, 128, 128, &r0, &g0, &b0); - ASSERT_EQ(130, r0); - ASSERT_EQ(130, g0); - ASSERT_EQ(130, b0); + EXPECT_EQ(130, r0); + EXPECT_EQ(130, g0); + EXPECT_EQ(130, b0); YUVToRGB(128, 128, 128, &r1, &g1, &b1); - ASSERT_EQ(130, r1); - ASSERT_EQ(130, g1); - ASSERT_EQ(130, b1); + EXPECT_EQ(130, r1); + EXPECT_EQ(130, g1); + EXPECT_EQ(130, b1); for (int y = 0; y < 256; ++y) { YUVToRGBReference(y, 128, 128, &r0, &g0, &b0); YUVToRGB(y, 128, 128, &r1, &g1, &b1); YToRGB(y, &r2, &g2, &b2); - ASSERT_EQ(r0, r1); - ASSERT_EQ(g0, g1); - ASSERT_EQ(b0, b1); - ASSERT_EQ(r0, r2); - ASSERT_EQ(g0, g2); - ASSERT_EQ(b0, b2); + EXPECT_EQ(r0, r1); + EXPECT_EQ(g0, g1); + EXPECT_EQ(b0, b1); + EXPECT_EQ(r0, r2); + EXPECT_EQ(g0, g2); + EXPECT_EQ(b0, b2); } } @@ -608,11 +612,10 @@ static void PrintHistogram(int rh[256], int gh[256], int bh[256]) { #ifdef DISABLE_SLOW_TESTS #define FASTSTEP 5 #else -#define FASTSTEP 3 +#define FASTSTEP 1 #endif // BT.601 limited range. -#ifndef DISABLE_SLOW_TESTS TEST_F(LibYUVColorTest, TestFullYUV) { int rh[256] = { 0, @@ -623,16 +626,16 @@ TEST_F(LibYUVColorTest, TestFullYUV) { int bh[256] = { 0, }; - for (int u = 0; u < 256; u += FASTSTEP) { - for (int v = 0; v < 256; v += FASTSTEP) { + for (int u = 0; u < 256; ++u) { + for (int v = 0; v < 256; ++v) { for (int y2 = 0; y2 < 256; y2 += FASTSTEP) { int r0, g0, b0, r1, g1, b1; int y = RANDOM256(y2); YUVToRGBReference(y, u, v, &r0, &g0, &b0); YUVToRGB(y, u, v, &r1, &g1, &b1); - ASSERT_NEAR(r0, r1, ERROR_R); - ASSERT_NEAR(g0, g1, ERROR_G); - ASSERT_NEAR(b0, b1, ERROR_B); + EXPECT_NEAR(r0, r1, ERROR_R); + EXPECT_NEAR(g0, g1, ERROR_G); + EXPECT_NEAR(b0, b1, ERROR_B); ++rh[r1 - r0 + 128]; ++gh[g1 - g0 + 128]; ++bh[b1 - b0 + 128]; @@ -653,16 +656,16 @@ TEST_F(LibYUVColorTest, TestFullYUVJ) { int bh[256] = { 0, }; - for (int u = 0; u < 256; u += FASTSTEP) { - for (int v = 0; v < 256; v += FASTSTEP) { + for (int u = 0; u < 256; ++u) { + for (int v = 0; v < 256; ++v) { for (int y2 = 0; y2 < 256; y2 += FASTSTEP) { int r0, g0, b0, r1, g1, b1; int y = RANDOM256(y2); YUVJToRGBReference(y, u, v, &r0, &g0, &b0); YUVJToRGB(y, u, v, &r1, &g1, &b1); - ASSERT_NEAR(r0, r1, ERROR_R); - ASSERT_NEAR(g0, g1, ERROR_G); - ASSERT_NEAR(b0, b1, ERROR_B); + EXPECT_NEAR(r0, r1, ERROR_R); + EXPECT_NEAR(g0, g1, ERROR_G); + EXPECT_NEAR(b0, b1, ERROR_B); ++rh[r1 - r0 + 128]; ++gh[g1 - g0 + 128]; ++bh[b1 - b0 + 128]; @@ -683,16 +686,16 @@ TEST_F(LibYUVColorTest, TestFullYUVH) { int bh[256] = { 0, }; - for (int u = 0; u < 256; u += FASTSTEP) { - for (int v = 0; v < 256; v += FASTSTEP) { + for (int u = 0; u < 256; ++u) { + for (int v = 0; v < 256; ++v) { for (int y2 = 0; y2 < 256; y2 += FASTSTEP) { int r0, g0, b0, r1, g1, b1; int y = RANDOM256(y2); YUVHToRGBReference(y, u, v, &r0, &g0, &b0); YUVHToRGB(y, u, v, &r1, &g1, &b1); - ASSERT_NEAR(r0, r1, ERROR_R); - ASSERT_NEAR(g0, g1, ERROR_G); - ASSERT_NEAR(b0, b1, ERROR_B); + EXPECT_NEAR(r0, r1, ERROR_R); + EXPECT_NEAR(g0, g1, ERROR_G); + EXPECT_NEAR(b0, b1, ERROR_B); ++rh[r1 - r0 + 128]; ++gh[g1 - g0 + 128]; ++bh[b1 - b0 + 128]; @@ -713,16 +716,16 @@ TEST_F(LibYUVColorTest, TestFullYUVF) { int bh[256] = { 0, }; - for (int u = 0; u < 256; u += FASTSTEP) { - for (int v = 0; v < 256; v += FASTSTEP) { + for (int u = 0; u < 256; ++u) { + for (int v = 0; v < 256; ++v) { for (int y2 = 0; y2 < 256; y2 += FASTSTEP) { int r0, g0, b0, r1, g1, b1; int y = RANDOM256(y2); YUVFToRGBReference(y, u, v, &r0, &g0, &b0); YUVFToRGB(y, u, v, &r1, &g1, &b1); - ASSERT_NEAR(r0, r1, ERROR_R); - ASSERT_NEAR(g0, g1, ERROR_G); - ASSERT_NEAR(b0, b1, ERROR_B); + EXPECT_NEAR(r0, r1, ERROR_R); + EXPECT_NEAR(g0, g1, ERROR_G); + EXPECT_NEAR(b0, b1, ERROR_B); ++rh[r1 - r0 + 128]; ++gh[g1 - g0 + 128]; ++bh[b1 - b0 + 128]; @@ -743,16 +746,16 @@ TEST_F(LibYUVColorTest, TestFullYUVU) { int bh[256] = { 0, }; - for (int u = 0; u < 256; u += FASTSTEP) { - for (int v = 0; v < 256; v += FASTSTEP) { + for (int u = 0; u < 256; ++u) { + for (int v = 0; v < 256; ++v) { for (int y2 = 0; y2 < 256; y2 += FASTSTEP) { int r0, g0, b0, r1, g1, b1; int y = RANDOM256(y2); YUVUToRGBReference(y, u, v, &r0, &g0, &b0); YUVUToRGB(y, u, v, &r1, &g1, &b1); - ASSERT_NEAR(r0, r1, ERROR_R); - ASSERT_NEAR(g0, g1, ERROR_G); - ASSERT_NEAR(b0, b1, ERROR_B); + EXPECT_NEAR(r0, r1, ERROR_R); + EXPECT_NEAR(g0, g1, ERROR_G); + EXPECT_NEAR(b0, b1, ERROR_B); ++rh[r1 - r0 + 128]; ++gh[g1 - g0 + 128]; ++bh[b1 - b0 + 128]; @@ -773,16 +776,16 @@ TEST_F(LibYUVColorTest, TestFullYUVV) { int bh[256] = { 0, }; - for (int u = 0; u < 256; u += FASTSTEP) { - for (int v = 0; v < 256; v += FASTSTEP) { + for (int u = 0; u < 256; ++u) { + for (int v = 0; v < 256; ++v) { for (int y2 = 0; y2 < 256; y2 += FASTSTEP) { int r0, g0, b0, r1, g1, b1; int y = RANDOM256(y2); YUVVToRGBReference(y, u, v, &r0, &g0, &b0); YUVVToRGB(y, u, v, &r1, &g1, &b1); - ASSERT_NEAR(r0, r1, ERROR_R); - ASSERT_NEAR(g0, g1, 2); - ASSERT_NEAR(b0, b1, ERROR_B); + EXPECT_NEAR(r0, r1, ERROR_R); + EXPECT_NEAR(g0, g1, 2); + EXPECT_NEAR(b0, b1, ERROR_B); ++rh[r1 - r0 + 128]; ++gh[g1 - g0 + 128]; ++bh[b1 - b0 + 128]; @@ -791,8 +794,6 @@ TEST_F(LibYUVColorTest, TestFullYUVV) { } PrintHistogram(rh, gh, bh); } -#endif // DISABLE_SLOW_TESTS - #undef FASTSTEP TEST_F(LibYUVColorTest, TestGreyYUVJ) { @@ -800,47 +801,47 @@ TEST_F(LibYUVColorTest, TestGreyYUVJ) { // black YUVJToRGBReference(0, 128, 128, &r0, &g0, &b0); - ASSERT_EQ(0, r0); - ASSERT_EQ(0, g0); - ASSERT_EQ(0, b0); + EXPECT_EQ(0, r0); + EXPECT_EQ(0, g0); + EXPECT_EQ(0, b0); YUVJToRGB(0, 128, 128, &r1, &g1, &b1); - ASSERT_EQ(0, r1); - ASSERT_EQ(0, g1); - ASSERT_EQ(0, b1); + EXPECT_EQ(0, r1); + EXPECT_EQ(0, g1); + EXPECT_EQ(0, b1); // white YUVJToRGBReference(255, 128, 128, &r0, &g0, &b0); - ASSERT_EQ(255, r0); - ASSERT_EQ(255, g0); - ASSERT_EQ(255, b0); + EXPECT_EQ(255, r0); + EXPECT_EQ(255, g0); + EXPECT_EQ(255, b0); YUVJToRGB(255, 128, 128, &r1, &g1, &b1); - ASSERT_EQ(255, r1); - ASSERT_EQ(255, g1); - ASSERT_EQ(255, b1); + EXPECT_EQ(255, r1); + EXPECT_EQ(255, g1); + EXPECT_EQ(255, b1); // grey YUVJToRGBReference(128, 128, 128, &r0, &g0, &b0); - ASSERT_EQ(128, r0); - ASSERT_EQ(128, g0); - ASSERT_EQ(128, b0); + EXPECT_EQ(128, r0); + EXPECT_EQ(128, g0); + EXPECT_EQ(128, b0); YUVJToRGB(128, 128, 128, &r1, &g1, &b1); - ASSERT_EQ(128, r1); - ASSERT_EQ(128, g1); - ASSERT_EQ(128, b1); + EXPECT_EQ(128, r1); + EXPECT_EQ(128, g1); + EXPECT_EQ(128, b1); for (int y = 0; y < 256; ++y) { YUVJToRGBReference(y, 128, 128, &r0, &g0, &b0); YUVJToRGB(y, 128, 128, &r1, &g1, &b1); YJToRGB(y, &r2, &g2, &b2); - ASSERT_EQ(r0, r1); - ASSERT_EQ(g0, g1); - ASSERT_EQ(b0, b1); - ASSERT_EQ(r0, r2); - ASSERT_EQ(g0, g2); - ASSERT_EQ(b0, b2); + EXPECT_EQ(r0, r1); + EXPECT_EQ(g0, g1); + EXPECT_EQ(b0, b1); + EXPECT_EQ(r0, r2); + EXPECT_EQ(g0, g2); + EXPECT_EQ(b0, b2); } } diff --git a/unit_test/compare_test.cc b/unit_test/compare_test.cc index 8ae17f545..c29562cb8 100644 --- a/unit_test/compare_test.cc +++ b/unit_test/compare_test.cc @@ -48,7 +48,7 @@ TEST_F(LibYUVCompareTest, Djb2_Test) { " together with Hermann Zapf"; uint32_t foxhash = HashDjb2(reinterpret_cast(fox), 131, 5381); const uint32_t kExpectedFoxHash = 2611006483u; - ASSERT_EQ(kExpectedFoxHash, foxhash); + EXPECT_EQ(kExpectedFoxHash, foxhash); for (int i = 0; i < kMaxTest; ++i) { src_a[i] = (fastrand() & 0xff); @@ -57,13 +57,13 @@ TEST_F(LibYUVCompareTest, Djb2_Test) { // Compare different buffers. Expect hash is different. uint32_t h1 = HashDjb2(src_a, kMaxTest, 5381); uint32_t h2 = HashDjb2(src_b, kMaxTest, 5381); - ASSERT_NE(h1, h2); + EXPECT_NE(h1, h2); // Make last half same. Expect hash is different. memcpy(src_a + kMaxTest / 2, src_b + kMaxTest / 2, kMaxTest / 2); h1 = HashDjb2(src_a, kMaxTest, 5381); h2 = HashDjb2(src_b, kMaxTest, 5381); - ASSERT_NE(h1, h2); + EXPECT_NE(h1, h2); // Make first half same. Expect hash is different. memcpy(src_a + kMaxTest / 2, src_a, kMaxTest / 2); @@ -71,52 +71,52 @@ TEST_F(LibYUVCompareTest, Djb2_Test) { memcpy(src_a, src_b, kMaxTest / 2); h1 = HashDjb2(src_a, kMaxTest, 5381); h2 = HashDjb2(src_b, kMaxTest, 5381); - ASSERT_NE(h1, h2); + EXPECT_NE(h1, h2); // Make same. Expect hash is same. memcpy(src_a, src_b, kMaxTest); h1 = HashDjb2(src_a, kMaxTest, 5381); h2 = HashDjb2(src_b, kMaxTest, 5381); - ASSERT_EQ(h1, h2); + EXPECT_EQ(h1, h2); // Mask seed different. Expect hash is different. memcpy(src_a, src_b, kMaxTest); h1 = HashDjb2(src_a, kMaxTest, 5381); h2 = HashDjb2(src_b, kMaxTest, 1234); - ASSERT_NE(h1, h2); + EXPECT_NE(h1, h2); // Make one byte different in middle. Expect hash is different. memcpy(src_a, src_b, kMaxTest); ++src_b[kMaxTest / 2]; h1 = HashDjb2(src_a, kMaxTest, 5381); h2 = HashDjb2(src_b, kMaxTest, 5381); - ASSERT_NE(h1, h2); + EXPECT_NE(h1, h2); // Make first byte different. Expect hash is different. memcpy(src_a, src_b, kMaxTest); ++src_b[0]; h1 = HashDjb2(src_a, kMaxTest, 5381); h2 = HashDjb2(src_b, kMaxTest, 5381); - ASSERT_NE(h1, h2); + EXPECT_NE(h1, h2); // Make last byte different. Expect hash is different. memcpy(src_a, src_b, kMaxTest); ++src_b[kMaxTest - 1]; h1 = HashDjb2(src_a, kMaxTest, 5381); h2 = HashDjb2(src_b, kMaxTest, 5381); - ASSERT_NE(h1, h2); + EXPECT_NE(h1, h2); // Make a zeros. Test different lengths. Expect hash is different. memset(src_a, 0, kMaxTest); h1 = HashDjb2(src_a, kMaxTest, 5381); h2 = HashDjb2(src_a, kMaxTest / 2, 5381); - ASSERT_NE(h1, h2); + EXPECT_NE(h1, h2); // Make a zeros and seed of zero. Test different lengths. Expect hash is same. memset(src_a, 0, kMaxTest); h1 = HashDjb2(src_a, kMaxTest, 0); h2 = HashDjb2(src_a, kMaxTest / 2, 0); - ASSERT_EQ(h1, h2); + EXPECT_EQ(h1, h2); free_aligned_buffer_page_end(src_a); free_aligned_buffer_page_end(src_b); @@ -134,7 +134,7 @@ TEST_F(LibYUVCompareTest, BenchmarkDjb2_Opt) { for (int i = 0; i < benchmark_iterations_; ++i) { h1 = HashDjb2(src_a, kMaxTest, 5381); } - ASSERT_EQ(h1, h2); + EXPECT_EQ(h1, h2); free_aligned_buffer_page_end(src_a); } @@ -149,7 +149,7 @@ TEST_F(LibYUVCompareTest, BenchmarkDjb2_Unaligned) { for (int i = 0; i < benchmark_iterations_; ++i) { h1 = HashDjb2(src_a + 1, kMaxTest, 5381); } - ASSERT_EQ(h1, h2); + EXPECT_EQ(h1, h2); free_aligned_buffer_page_end(src_a); } @@ -164,19 +164,19 @@ TEST_F(LibYUVCompareTest, BenchmarkARGBDetect_Opt) { src_a[0] = 0; fourcc = ARGBDetect(src_a, benchmark_width_ * 4, benchmark_width_, benchmark_height_); - ASSERT_EQ(static_cast(libyuv::FOURCC_BGRA), fourcc); + EXPECT_EQ(static_cast(libyuv::FOURCC_BGRA), fourcc); src_a[0] = 255; src_a[3] = 0; fourcc = ARGBDetect(src_a, benchmark_width_ * 4, benchmark_width_, benchmark_height_); - ASSERT_EQ(static_cast(libyuv::FOURCC_ARGB), fourcc); + EXPECT_EQ(static_cast(libyuv::FOURCC_ARGB), fourcc); src_a[3] = 255; for (int i = 0; i < benchmark_iterations_; ++i) { fourcc = ARGBDetect(src_a, benchmark_width_ * 4, benchmark_width_, benchmark_height_); } - ASSERT_EQ(0u, fourcc); + EXPECT_EQ(0u, fourcc); free_aligned_buffer_page_end(src_a); } @@ -192,19 +192,19 @@ TEST_F(LibYUVCompareTest, BenchmarkARGBDetect_Unaligned) { src_a[0 + 1] = 0; fourcc = ARGBDetect(src_a + 1, benchmark_width_ * 4, benchmark_width_, benchmark_height_); - ASSERT_EQ(static_cast(libyuv::FOURCC_BGRA), fourcc); + EXPECT_EQ(static_cast(libyuv::FOURCC_BGRA), fourcc); src_a[0 + 1] = 255; src_a[3 + 1] = 0; fourcc = ARGBDetect(src_a + 1, benchmark_width_ * 4, benchmark_width_, benchmark_height_); - ASSERT_EQ(static_cast(libyuv::FOURCC_ARGB), fourcc); + EXPECT_EQ(static_cast(libyuv::FOURCC_ARGB), fourcc); src_a[3 + 1] = 255; for (int i = 0; i < benchmark_iterations_; ++i) { fourcc = ARGBDetect(src_a + 1, benchmark_width_ * 4, benchmark_width_, benchmark_height_); } - ASSERT_EQ(0u, fourcc); + EXPECT_EQ(0u, fourcc); free_aligned_buffer_page_end(src_a); } @@ -221,7 +221,7 @@ TEST_F(LibYUVCompareTest, BenchmarkHammingDistance_Opt) { memcpy(src_a, "test0123test4567", 16); memcpy(src_b, "tick0123tock4567", 16); uint32_t h1 = HammingDistance_C(src_a, src_b, 16); - ASSERT_EQ(16u, h1); + EXPECT_EQ(16u, h1); // Test C vs OPT on random buffer MemRandomize(src_a, kMaxWidth); @@ -263,7 +263,7 @@ TEST_F(LibYUVCompareTest, BenchmarkHammingDistance_Opt) { h1 = HammingDistance_C(src_a, src_b, kMaxWidth); #endif } - ASSERT_EQ(h0, h1); + EXPECT_EQ(h0, h1); free_aligned_buffer_page_end(src_a); free_aligned_buffer_page_end(src_b); @@ -280,7 +280,7 @@ TEST_F(LibYUVCompareTest, BenchmarkHammingDistance_C) { memcpy(src_a, "test0123test4567", 16); memcpy(src_b, "tick0123tock4567", 16); uint32_t h1 = HammingDistance_C(src_a, src_b, 16); - ASSERT_EQ(16u, h1); + EXPECT_EQ(16u, h1); // Test C vs OPT on random buffer MemRandomize(src_a, kMaxWidth); @@ -295,7 +295,7 @@ TEST_F(LibYUVCompareTest, BenchmarkHammingDistance_C) { h1 = HammingDistance_C(src_a, src_b, kMaxWidth); } - ASSERT_EQ(h0, h1); + EXPECT_EQ(h0, h1); free_aligned_buffer_page_end(src_a); free_aligned_buffer_page_end(src_b); @@ -311,7 +311,7 @@ TEST_F(LibYUVCompareTest, BenchmarkHammingDistance) { memcpy(src_a, "test0123test4567", 16); memcpy(src_b, "tick0123tock4567", 16); uint64_t h1 = ComputeHammingDistance(src_a, src_b, 16); - ASSERT_EQ(16u, h1); + EXPECT_EQ(16u, h1); // Test C vs OPT on random buffer MemRandomize(src_a, kMaxWidth); @@ -326,7 +326,7 @@ TEST_F(LibYUVCompareTest, BenchmarkHammingDistance) { h1 = ComputeHammingDistance(src_a, src_b, kMaxWidth); } - ASSERT_EQ(h0, h1); + EXPECT_EQ(h0, h1); free_aligned_buffer_page_end(src_a); free_aligned_buffer_page_end(src_b); @@ -351,7 +351,7 @@ TEST_F(LibYUVCompareTest, TestHammingDistance_Opt) { memset(src_b, 0u, kMaxWidth); uint64_t h0 = ComputeHammingDistance(src_a, src_b, kMaxWidth); - ASSERT_EQ(kMaxWidth * 8ULL, h0); + EXPECT_EQ(kMaxWidth * 8ULL, h0); for (int i = 0; i < benchmark_iterations_; ++i) { #if defined(HAS_HAMMINGDISTANCE_NEON) @@ -389,7 +389,7 @@ TEST_F(LibYUVCompareTest, TestHammingDistance_Opt) { // result can not be expected to be correct. // TODO(fbarchard): Consider expecting the low 16 bits to match. if (kMaxWidth <= kMaxOptCount) { - ASSERT_EQ(kMaxWidth * 8U, h1); + EXPECT_EQ(kMaxWidth * 8U, h1); } else { if (kMaxWidth * 8ULL != static_cast(h1)) { printf( @@ -420,7 +420,7 @@ TEST_F(LibYUVCompareTest, TestHammingDistance) { h1 = ComputeHammingDistance(src_a, src_b, benchmark_width_ * benchmark_height_); } - ASSERT_EQ(benchmark_width_ * benchmark_height_ * 8ULL, h1); + EXPECT_EQ(benchmark_width_ * benchmark_height_ * 8ULL, h1); free_aligned_buffer_page_end(src_a); free_aligned_buffer_page_end(src_b); @@ -436,7 +436,7 @@ TEST_F(LibYUVCompareTest, BenchmarkSumSquareError_Opt) { memcpy(src_a, "test0123test4567", 16); memcpy(src_b, "tick0123tock4567", 16); uint64_t h1 = ComputeSumSquareError(src_a, src_b, 16); - ASSERT_EQ(790u, h1); + EXPECT_EQ(790u, h1); for (int i = 0; i < kMaxWidth; ++i) { src_a[i] = i; @@ -452,7 +452,7 @@ TEST_F(LibYUVCompareTest, BenchmarkSumSquareError_Opt) { h1 = ComputeSumSquareError(src_a, src_b, kMaxWidth); } - ASSERT_EQ(0u, h1); + EXPECT_EQ(0u, h1); free_aligned_buffer_page_end(src_a); free_aligned_buffer_page_end(src_b); @@ -468,18 +468,18 @@ TEST_F(LibYUVCompareTest, SumSquareError) { uint64_t err; err = ComputeSumSquareError(src_a, src_b, kMaxWidth); - ASSERT_EQ(0u, err); + EXPECT_EQ(0u, err); memset(src_a, 1, kMaxWidth); err = ComputeSumSquareError(src_a, src_b, kMaxWidth); - ASSERT_EQ(static_cast(err), kMaxWidth); + EXPECT_EQ(static_cast(err), kMaxWidth); memset(src_a, 190, kMaxWidth); memset(src_b, 193, kMaxWidth); err = ComputeSumSquareError(src_a, src_b, kMaxWidth); - ASSERT_EQ(static_cast(err), kMaxWidth * 3 * 3); + EXPECT_EQ(static_cast(err), kMaxWidth * 3 * 3); for (int i = 0; i < kMaxWidth; ++i) { src_a[i] = (fastrand() & 0xff); @@ -492,7 +492,7 @@ TEST_F(LibYUVCompareTest, SumSquareError) { MaskCpuFlags(benchmark_cpu_info_); uint64_t opt_err = ComputeSumSquareError(src_a, src_b, kMaxWidth); - ASSERT_EQ(c_err, opt_err); + EXPECT_EQ(c_err, opt_err); free_aligned_buffer_page_end(src_a); free_aligned_buffer_page_end(src_b); @@ -517,7 +517,7 @@ TEST_F(LibYUVCompareTest, BenchmarkPsnr_Opt) { opt_time = (get_time() - opt_time) / benchmark_iterations_; printf("BenchmarkPsnr_Opt - %8.2f us opt\n", opt_time * 1e6); - ASSERT_EQ(0, 0); + EXPECT_EQ(0, 0); free_aligned_buffer_page_end(src_a); free_aligned_buffer_page_end(src_b); @@ -542,7 +542,7 @@ TEST_F(LibYUVCompareTest, BenchmarkPsnr_Unaligned) { opt_time = (get_time() - opt_time) / benchmark_iterations_; printf("BenchmarkPsnr_Opt - %8.2f us opt\n", opt_time * 1e6); - ASSERT_EQ(0, 0); + EXPECT_EQ(0, 0); free_aligned_buffer_page_end(src_a); free_aligned_buffer_page_end(src_b); @@ -564,7 +564,7 @@ TEST_F(LibYUVCompareTest, Psnr) { src_b + kSrcStride * b + b, kSrcStride, kSrcWidth, kSrcHeight); - ASSERT_EQ(err, kMaxPsnr); + EXPECT_EQ(err, kMaxPsnr); memset(src_a, 255, kSrcPlaneSize); @@ -572,7 +572,7 @@ TEST_F(LibYUVCompareTest, Psnr) { src_b + kSrcStride * b + b, kSrcStride, kSrcWidth, kSrcHeight); - ASSERT_EQ(err, 0.0); + EXPECT_EQ(err, 0.0); memset(src_a, 1, kSrcPlaneSize); @@ -580,8 +580,8 @@ TEST_F(LibYUVCompareTest, Psnr) { src_b + kSrcStride * b + b, kSrcStride, kSrcWidth, kSrcHeight); - ASSERT_GT(err, 48.0); - ASSERT_LT(err, 49.0); + EXPECT_GT(err, 48.0); + EXPECT_LT(err, 49.0); for (int i = 0; i < kSrcPlaneSize; ++i) { src_a[i] = i; @@ -591,9 +591,9 @@ TEST_F(LibYUVCompareTest, Psnr) { src_b + kSrcStride * b + b, kSrcStride, kSrcWidth, kSrcHeight); - ASSERT_GT(err, 2.0); + EXPECT_GT(err, 2.0); if (kSrcWidth * kSrcHeight >= 256) { - ASSERT_LT(err, 6.0); + EXPECT_LT(err, 6.0); } memset(src_a, 0, kSrcPlaneSize); @@ -619,7 +619,7 @@ TEST_F(LibYUVCompareTest, Psnr) { src_b + kSrcStride * b + b, kSrcStride, kSrcWidth, kSrcHeight); - ASSERT_EQ(opt_err, c_err); + EXPECT_EQ(opt_err, c_err); free_aligned_buffer_page_end(src_a); free_aligned_buffer_page_end(src_b); @@ -644,7 +644,7 @@ TEST_F(LibYUVCompareTest, DISABLED_BenchmarkSsim_Opt) { opt_time = (get_time() - opt_time) / benchmark_iterations_; printf("BenchmarkSsim_Opt - %8.2f us opt\n", opt_time * 1e6); - ASSERT_EQ(0, 0); // Pass if we get this far. + EXPECT_EQ(0, 0); // Pass if we get this far. free_aligned_buffer_page_end(src_a); free_aligned_buffer_page_end(src_b); @@ -671,7 +671,7 @@ TEST_F(LibYUVCompareTest, Ssim) { kSrcHeight); if (kSrcWidth > 8 && kSrcHeight > 8) { - ASSERT_EQ(err, 1.0); + EXPECT_EQ(err, 1.0); } memset(src_a, 255, kSrcPlaneSize); @@ -681,7 +681,7 @@ TEST_F(LibYUVCompareTest, Ssim) { kSrcHeight); if (kSrcWidth > 8 && kSrcHeight > 8) { - ASSERT_LT(err, 0.0001); + EXPECT_LT(err, 0.0001); } memset(src_a, 1, kSrcPlaneSize); @@ -691,8 +691,8 @@ TEST_F(LibYUVCompareTest, Ssim) { kSrcHeight); if (kSrcWidth > 8 && kSrcHeight > 8) { - ASSERT_GT(err, 0.0001); - ASSERT_LT(err, 0.9); + EXPECT_GT(err, 0.0001); + EXPECT_LT(err, 0.9); } for (int i = 0; i < kSrcPlaneSize; ++i) { @@ -704,8 +704,8 @@ TEST_F(LibYUVCompareTest, Ssim) { kSrcHeight); if (kSrcWidth > 8 && kSrcHeight > 8) { - ASSERT_GT(err, 0.0); - ASSERT_LT(err, 0.01); + EXPECT_GT(err, 0.0); + EXPECT_LT(err, 0.01); } for (int i = b; i < (kSrcHeight + b); ++i) { @@ -729,7 +729,7 @@ TEST_F(LibYUVCompareTest, Ssim) { kSrcHeight); if (kSrcWidth > 8 && kSrcHeight > 8) { - ASSERT_EQ(opt_err, c_err); + EXPECT_EQ(opt_err, c_err); } free_aligned_buffer_page_end(src_a); diff --git a/unit_test/convert_argb_test.cc b/unit_test/convert_argb_test.cc index 7f545a435..e309b38bb 100644 --- a/unit_test/convert_argb_test.cc +++ b/unit_test/convert_argb_test.cc @@ -53,9 +53,9 @@ namespace libyuv { #define ABGRToABGR ARGBCopy // subsample amount uses a divide. -#define SUBSAMPLE(v, a) ((((v) + (a) - 1)) / (a)) +#define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a)) -#define ALIGNINT(V, ALIGN) (((V) + (ALIGN) - 1) / (ALIGN) * (ALIGN)) +#define ALIGNINT(V, ALIGN) (((V) + (ALIGN)-1) / (ALIGN) * (ALIGN)) #define TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \ SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \ @@ -82,19 +82,15 @@ namespace libyuv { (kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1); \ const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X); \ const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y); \ - align_buffer_page_end(src_y, \ - kPaddedWidth * kPaddedHeight * SRC_BPC + OFF); \ + align_buffer_page_end(src_y, kPaddedWidth* kPaddedHeight* SRC_BPC + OFF); \ align_buffer_page_end( \ - src_uv, \ - kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * SRC_BPC * 2 + OFF); \ - align_buffer_page_end(dst_y_c, kWidth * kHeight * DST_BPC); \ - align_buffer_page_end(dst_u_c, kDstHalfWidth * kDstHalfHeight * DST_BPC); \ - align_buffer_page_end(dst_v_c, kDstHalfWidth * kDstHalfHeight * DST_BPC); \ - align_buffer_page_end(dst_y_opt, kWidth * kHeight * DST_BPC); \ - align_buffer_page_end(dst_u_opt, \ - kDstHalfWidth * kDstHalfHeight * DST_BPC); \ - align_buffer_page_end(dst_v_opt, \ - kDstHalfWidth * kDstHalfHeight * DST_BPC); \ + src_uv, kSrcHalfPaddedWidth* kSrcHalfPaddedHeight* SRC_BPC * 2 + OFF); \ + align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \ + align_buffer_page_end(dst_u_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + align_buffer_page_end(dst_v_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \ + align_buffer_page_end(dst_u_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + align_buffer_page_end(dst_v_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ SRC_T* src_y_p = reinterpret_cast(src_y + OFF); \ SRC_T* src_uv_p = reinterpret_cast(src_uv + OFF); \ for (int i = 0; i < kPaddedWidth * kPaddedHeight; ++i) { \ @@ -105,12 +101,12 @@ namespace libyuv { src_uv_p[i] = \ (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \ } \ - memset(dst_y_c, 1, kWidth * kHeight * DST_BPC); \ - memset(dst_u_c, 2, kDstHalfWidth * kDstHalfHeight * DST_BPC); \ - memset(dst_v_c, 3, kDstHalfWidth * kDstHalfHeight * DST_BPC); \ - memset(dst_y_opt, 101, kWidth * kHeight * DST_BPC); \ - memset(dst_u_opt, 102, kDstHalfWidth * kDstHalfHeight * DST_BPC); \ - memset(dst_v_opt, 103, kDstHalfWidth * kDstHalfHeight * DST_BPC); \ + memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \ + memset(dst_u_c, 2, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + memset(dst_v_c, 3, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \ + memset(dst_u_opt, 102, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + memset(dst_v_opt, 103, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ MaskCpuFlags(disable_cpu_flags_); \ SRC_FMT_PLANAR##To##FMT_PLANAR( \ src_y_p, kWidth, src_uv_p, kSrcHalfWidth * 2, \ @@ -128,11 +124,11 @@ namespace libyuv { NEG kHeight); \ } \ for (int i = 0; i < kHeight * kWidth * DST_BPC; ++i) { \ - ASSERT_EQ(dst_y_c[i], dst_y_opt[i]); \ + EXPECT_EQ(dst_y_c[i], dst_y_opt[i]); \ } \ for (int i = 0; i < kDstHalfWidth * kDstHalfHeight * DST_BPC; ++i) { \ - ASSERT_EQ(dst_u_c[i], dst_u_opt[i]); \ - ASSERT_EQ(dst_v_c[i], dst_v_opt[i]); \ + EXPECT_EQ(dst_u_c[i], dst_u_opt[i]); \ + EXPECT_EQ(dst_v_c[i], dst_v_opt[i]); \ } \ free_aligned_buffer_page_end(dst_y_c); \ free_aligned_buffer_page_end(dst_u_c); \ @@ -227,11 +223,11 @@ TESTBPTOP(P012, uint16_t, 2, 2, 2, I012, uint16_t, 2, 2, 2, 12, 1, 1) const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \ const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \ - align_buffer_page_end(src_y, kWidth * kHeight + OFF); \ + align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ align_buffer_page_end(src_u, kSizeUV + OFF); \ align_buffer_page_end(src_v, kSizeUV + OFF); \ - align_buffer_page_end(dst_argb_c, kStrideB * kHeight + OFF); \ - align_buffer_page_end(dst_argb_opt, kStrideB * kHeight + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF); \ + align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF); \ for (int i = 0; i < kWidth * kHeight; ++i) { \ src_y[i + OFF] = (fastrand() & 0xff); \ } \ @@ -258,7 +254,7 @@ TESTBPTOP(P012, uint16_t, 2, 2, 2, I012, uint16_t, 2, 2, 2, 12, 1, 1) static_cast((time1 - time0) * 1e6), \ static_cast((time2 - time1) * 1e6 / benchmark_iterations_)); \ for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \ - ASSERT_EQ(dst_argb_c[i + OFF], dst_argb_opt[i + OFF]); \ + EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_opt[i + OFF]); \ } \ free_aligned_buffer_page_end(src_y); \ free_aligned_buffer_page_end(src_u); \ @@ -385,58 +381,58 @@ TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1) TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1) #endif -#define TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ - W1280, N, NEG, OFF) \ - TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ - const int kWidth = W1280; \ - const int kHeight = benchmark_height_; \ - const int kStrideB = kWidth * BPP_B; \ - const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ - align_buffer_page_end(src_y, kWidth * kHeight + OFF); \ - align_buffer_page_end( \ - src_uv, kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y) * 2 + OFF); \ - align_buffer_page_end(dst_argb_c, kStrideB * kHeight); \ - align_buffer_page_end(dst_argb_opt, kStrideB * kHeight); \ - for (int i = 0; i < kHeight; ++i) \ - for (int j = 0; j < kWidth; ++j) \ - src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \ - for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ - for (int j = 0; j < kStrideUV * 2; ++j) { \ - src_uv[i * kStrideUV * 2 + j + OFF] = (fastrand() & 0xff); \ - } \ - } \ - memset(dst_argb_c, 1, kStrideB * kHeight); \ - memset(dst_argb_opt, 101, kStrideB * kHeight); \ - MaskCpuFlags(disable_cpu_flags_); \ - FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_uv + OFF, kStrideUV * 2, \ - dst_argb_c, kWidth * BPP_B, kWidth, NEG kHeight); \ - MaskCpuFlags(benchmark_cpu_info_); \ - for (int i = 0; i < benchmark_iterations_; ++i) { \ - FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_uv + OFF, kStrideUV * 2, \ - dst_argb_opt, kWidth * BPP_B, kWidth, \ - NEG kHeight); \ - } \ - /* Convert to ARGB so 565 is expanded to bytes that can be compared. */ \ - align_buffer_page_end(dst_argb32_c, kWidth * 4 * kHeight); \ - align_buffer_page_end(dst_argb32_opt, kWidth * 4 * kHeight); \ - memset(dst_argb32_c, 2, kWidth * 4 * kHeight); \ - memset(dst_argb32_opt, 102, kWidth * 4 * kHeight); \ - FMT_C##ToARGB(dst_argb_c, kStrideB, dst_argb32_c, kWidth * 4, kWidth, \ - kHeight); \ - FMT_C##ToARGB(dst_argb_opt, kStrideB, dst_argb32_opt, kWidth * 4, kWidth, \ - kHeight); \ - for (int i = 0; i < kHeight; ++i) { \ - for (int j = 0; j < kWidth * 4; ++j) { \ - ASSERT_EQ(dst_argb32_c[i * kWidth * 4 + j], \ - dst_argb32_opt[i * kWidth * 4 + j]); \ - } \ - } \ - free_aligned_buffer_page_end(src_y); \ - free_aligned_buffer_page_end(src_uv); \ - free_aligned_buffer_page_end(dst_argb_c); \ - free_aligned_buffer_page_end(dst_argb_opt); \ - free_aligned_buffer_page_end(dst_argb32_c); \ - free_aligned_buffer_page_end(dst_argb32_opt); \ +#define TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \ + W1280, N, NEG, OFF) \ + TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \ + const int kWidth = W1280; \ + const int kHeight = benchmark_height_; \ + const int kStrideB = kWidth * BPP_B; \ + const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ + align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ + align_buffer_page_end(src_uv, \ + kStrideUV* SUBSAMPLE(kHeight, SUBSAMP_Y) * 2 + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideB* kHeight); \ + align_buffer_page_end(dst_argb_opt, kStrideB* kHeight); \ + for (int i = 0; i < kHeight; ++i) \ + for (int j = 0; j < kWidth; ++j) \ + src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \ + for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ + for (int j = 0; j < kStrideUV * 2; ++j) { \ + src_uv[i * kStrideUV * 2 + j + OFF] = (fastrand() & 0xff); \ + } \ + } \ + memset(dst_argb_c, 1, kStrideB* kHeight); \ + memset(dst_argb_opt, 101, kStrideB* kHeight); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_uv + OFF, kStrideUV * 2, \ + dst_argb_c, kWidth * BPP_B, kWidth, NEG kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + for (int i = 0; i < benchmark_iterations_; ++i) { \ + FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_uv + OFF, kStrideUV * 2, \ + dst_argb_opt, kWidth * BPP_B, kWidth, \ + NEG kHeight); \ + } \ + /* Convert to ARGB so 565 is expanded to bytes that can be compared. */ \ + align_buffer_page_end(dst_argb32_c, kWidth * 4 * kHeight); \ + align_buffer_page_end(dst_argb32_opt, kWidth * 4 * kHeight); \ + memset(dst_argb32_c, 2, kWidth * 4 * kHeight); \ + memset(dst_argb32_opt, 102, kWidth * 4 * kHeight); \ + FMT_C##ToARGB(dst_argb_c, kStrideB, dst_argb32_c, kWidth * 4, kWidth, \ + kHeight); \ + FMT_C##ToARGB(dst_argb_opt, kStrideB, dst_argb32_opt, kWidth * 4, kWidth, \ + kHeight); \ + for (int i = 0; i < kHeight; ++i) { \ + for (int j = 0; j < kWidth * 4; ++j) { \ + EXPECT_EQ(dst_argb32_c[i * kWidth * 4 + j], \ + dst_argb32_opt[i * kWidth * 4 + j]); \ + } \ + } \ + free_aligned_buffer_page_end(src_y); \ + free_aligned_buffer_page_end(src_uv); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ + free_aligned_buffer_page_end(dst_argb32_c); \ + free_aligned_buffer_page_end(dst_argb32_opt); \ } #if defined(ENABLE_FULL_TESTS) @@ -511,16 +507,15 @@ TESTBPTOB(NV12, 2, 2, RGB565, RGB565, 2) const int kStrideB = \ (kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \ align_buffer_page_end(src_argb, \ - kStrideA * kHeightA * (int)sizeof(TYPE_A) + OFF); \ - align_buffer_page_end(dst_argb_c, \ - kStrideB * kHeightB * (int)sizeof(TYPE_B)); \ + kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideB* kHeightB*(int)sizeof(TYPE_B)); \ align_buffer_page_end(dst_argb_opt, \ - kStrideB * kHeightB * (int)sizeof(TYPE_B)); \ + kStrideB* kHeightB*(int)sizeof(TYPE_B)); \ for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \ src_argb[i + OFF] = (fastrand() & 0xff); \ } \ - memset(dst_argb_c, 1, kStrideB * kHeightB); \ - memset(dst_argb_opt, 101, kStrideB * kHeightB); \ + memset(dst_argb_c, 1, kStrideB* kHeightB); \ + memset(dst_argb_opt, 101, kStrideB* kHeightB); \ MaskCpuFlags(disable_cpu_flags_); \ FMT_A##To##FMT_B((TYPE_A*)(src_argb + OFF), kStrideA, (TYPE_B*)dst_argb_c, \ kStrideB, kWidth, NEG kHeight); \ @@ -530,49 +525,48 @@ TESTBPTOB(NV12, 2, 2, RGB565, RGB565, 2) (TYPE_B*)dst_argb_opt, kStrideB, kWidth, NEG kHeight); \ } \ for (int i = 0; i < kStrideB * kHeightB * (int)sizeof(TYPE_B); ++i) { \ - ASSERT_EQ(dst_argb_c[i], dst_argb_opt[i]); \ + EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \ } \ free_aligned_buffer_page_end(src_argb); \ free_aligned_buffer_page_end(dst_argb_c); \ free_aligned_buffer_page_end(dst_argb_opt); \ } -#define TESTATOBRANDOM(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, \ - TYPE_B, EPP_B, STRIDE_B, HEIGHT_B) \ - TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##_Random) { \ - for (int times = 0; times < benchmark_iterations_; ++times) { \ - const int kWidth = (fastrand() & 63) + 1; \ - const int kHeight = (fastrand() & 31) + 1; \ - const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \ - const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \ - const int kStrideA = \ - (kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \ - const int kStrideB = \ - (kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \ - align_buffer_page_end(src_argb, \ - kStrideA * kHeightA * (int)sizeof(TYPE_A)); \ - align_buffer_page_end(dst_argb_c, \ - kStrideB * kHeightB * (int)sizeof(TYPE_B)); \ - align_buffer_page_end(dst_argb_opt, \ - kStrideB * kHeightB * (int)sizeof(TYPE_B)); \ - for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \ - src_argb[i] = 0xfe; \ - } \ - memset(dst_argb_c, 123, kStrideB * kHeightB); \ - memset(dst_argb_opt, 123, kStrideB * kHeightB); \ - MaskCpuFlags(disable_cpu_flags_); \ - FMT_A##To##FMT_B((TYPE_A*)src_argb, kStrideA, (TYPE_B*)dst_argb_c, \ - kStrideB, kWidth, kHeight); \ - MaskCpuFlags(benchmark_cpu_info_); \ - FMT_A##To##FMT_B((TYPE_A*)src_argb, kStrideA, (TYPE_B*)dst_argb_opt, \ - kStrideB, kWidth, kHeight); \ - for (int i = 0; i < kStrideB * kHeightB * (int)sizeof(TYPE_B); ++i) { \ - ASSERT_EQ(dst_argb_c[i], dst_argb_opt[i]); \ - } \ - free_aligned_buffer_page_end(src_argb); \ - free_aligned_buffer_page_end(dst_argb_c); \ - free_aligned_buffer_page_end(dst_argb_opt); \ - } \ +#define TESTATOBRANDOM(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, \ + TYPE_B, EPP_B, STRIDE_B, HEIGHT_B) \ + TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##_Random) { \ + for (int times = 0; times < benchmark_iterations_; ++times) { \ + const int kWidth = (fastrand() & 63) + 1; \ + const int kHeight = (fastrand() & 31) + 1; \ + const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \ + const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \ + const int kStrideA = \ + (kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \ + const int kStrideB = \ + (kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \ + align_buffer_page_end(src_argb, kStrideA* kHeightA*(int)sizeof(TYPE_A)); \ + align_buffer_page_end(dst_argb_c, \ + kStrideB* kHeightB*(int)sizeof(TYPE_B)); \ + align_buffer_page_end(dst_argb_opt, \ + kStrideB* kHeightB*(int)sizeof(TYPE_B)); \ + for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \ + src_argb[i] = 0xfe; \ + } \ + memset(dst_argb_c, 123, kStrideB* kHeightB); \ + memset(dst_argb_opt, 123, kStrideB* kHeightB); \ + MaskCpuFlags(disable_cpu_flags_); \ + FMT_A##To##FMT_B((TYPE_A*)src_argb, kStrideA, (TYPE_B*)dst_argb_c, \ + kStrideB, kWidth, kHeight); \ + MaskCpuFlags(benchmark_cpu_info_); \ + FMT_A##To##FMT_B((TYPE_A*)src_argb, kStrideA, (TYPE_B*)dst_argb_opt, \ + kStrideB, kWidth, kHeight); \ + for (int i = 0; i < kStrideB * kHeightB * (int)sizeof(TYPE_B); ++i) { \ + EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \ + } \ + free_aligned_buffer_page_end(src_argb); \ + free_aligned_buffer_page_end(dst_argb_c); \ + free_aligned_buffer_page_end(dst_argb_opt); \ + } \ } #if defined(ENABLE_FULL_TESTS) @@ -678,11 +672,11 @@ TESTATOB(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1) const int kStrideB = \ (kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \ align_buffer_page_end(src_argb, \ - kStrideA * kHeightA * (int)sizeof(TYPE_A) + OFF); \ + kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \ align_buffer_page_end(dst_argb_c, \ - kStrideA * kHeightA * (int)sizeof(TYPE_A) + OFF); \ + kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \ align_buffer_page_end(dst_argb_opt, \ - kStrideA * kHeightA * (int)sizeof(TYPE_A) + OFF); \ + kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \ for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \ src_argb[i + OFF] = (fastrand() & 0xff); \ } \ @@ -703,7 +697,7 @@ TESTATOB(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1) FMT_A##To##FMT_B((TYPE_A*)(dst_argb_opt /* src */ + OFF), kStrideA, \ (TYPE_B*)dst_argb_opt, kStrideB, kWidth, NEG kHeight); \ for (int i = 0; i < kStrideB * kHeightB * (int)sizeof(TYPE_B); ++i) { \ - ASSERT_EQ(dst_argb_c[i], dst_argb_opt[i]); \ + EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \ } \ free_aligned_buffer_page_end(src_argb); \ free_aligned_buffer_page_end(dst_argb_c); \ @@ -797,14 +791,14 @@ TESTATOA(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1) (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \ const int kStrideB = \ (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \ - align_buffer_page_end(src_argb, kStrideA * kHeightA + OFF); \ - align_buffer_page_end(dst_argb_c, kStrideB * kHeightB); \ - align_buffer_page_end(dst_argb_opt, kStrideB * kHeightB); \ + align_buffer_page_end(src_argb, kStrideA* kHeightA + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideB* kHeightB); \ + align_buffer_page_end(dst_argb_opt, kStrideB* kHeightB); \ for (int i = 0; i < kStrideA * kHeightA; ++i) { \ src_argb[i + OFF] = (fastrand() & 0xff); \ } \ - memset(dst_argb_c, 1, kStrideB * kHeightB); \ - memset(dst_argb_opt, 101, kStrideB * kHeightB); \ + memset(dst_argb_c, 1, kStrideB* kHeightB); \ + memset(dst_argb_opt, 101, kStrideB* kHeightB); \ MaskCpuFlags(disable_cpu_flags_); \ FMT_A##To##FMT_B##Dither(src_argb + OFF, kStrideA, dst_argb_c, kStrideB, \ NULL, kWidth, NEG kHeight); \ @@ -814,7 +808,7 @@ TESTATOA(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1) kStrideB, NULL, kWidth, NEG kHeight); \ } \ for (int i = 0; i < kStrideB * kHeightB; ++i) { \ - ASSERT_EQ(dst_argb_c[i], dst_argb_opt[i]); \ + EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \ } \ free_aligned_buffer_page_end(src_argb); \ free_aligned_buffer_page_end(dst_argb_c); \ @@ -833,14 +827,14 @@ TESTATOA(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1) (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \ const int kStrideB = \ (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \ - align_buffer_page_end(src_argb, kStrideA * kHeightA); \ - align_buffer_page_end(dst_argb_c, kStrideB * kHeightB); \ - align_buffer_page_end(dst_argb_opt, kStrideB * kHeightB); \ + align_buffer_page_end(src_argb, kStrideA* kHeightA); \ + align_buffer_page_end(dst_argb_c, kStrideB* kHeightB); \ + align_buffer_page_end(dst_argb_opt, kStrideB* kHeightB); \ for (int i = 0; i < kStrideA * kHeightA; ++i) { \ src_argb[i] = (fastrand() & 0xff); \ } \ - memset(dst_argb_c, 123, kStrideB * kHeightB); \ - memset(dst_argb_opt, 123, kStrideB * kHeightB); \ + memset(dst_argb_c, 123, kStrideB* kHeightB); \ + memset(dst_argb_opt, 123, kStrideB* kHeightB); \ MaskCpuFlags(disable_cpu_flags_); \ FMT_A##To##FMT_B##Dither(src_argb, kStrideA, dst_argb_c, kStrideB, NULL, \ kWidth, kHeight); \ @@ -848,7 +842,7 @@ TESTATOA(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1) FMT_A##To##FMT_B##Dither(src_argb, kStrideA, dst_argb_opt, kStrideB, \ NULL, kWidth, kHeight); \ for (int i = 0; i < kStrideB * kHeightB; ++i) { \ - ASSERT_EQ(dst_argb_c[i], dst_argb_opt[i]); \ + EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \ } \ free_aligned_buffer_page_end(src_argb); \ free_aligned_buffer_page_end(dst_argb_c); \ @@ -891,16 +885,15 @@ TESTATOBD(ARGB, 4, 4, 1, RGB565, 2, 2, 1) const int kStrideA = \ (kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \ align_buffer_page_end(src_argb, \ - kStrideA * kHeightA * (int)sizeof(TYPE_A) + OFF); \ - align_buffer_page_end(dst_argb_c, \ - kStrideA * kHeightA * (int)sizeof(TYPE_A)); \ + kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideA* kHeightA*(int)sizeof(TYPE_A)); \ align_buffer_page_end(dst_argb_opt, \ - kStrideA * kHeightA * (int)sizeof(TYPE_A)); \ + kStrideA* kHeightA*(int)sizeof(TYPE_A)); \ for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \ src_argb[i + OFF] = (fastrand() & 0xff); \ } \ - memset(dst_argb_c, 1, kStrideA * kHeightA); \ - memset(dst_argb_opt, 101, kStrideA * kHeightA); \ + memset(dst_argb_c, 1, kStrideA* kHeightA); \ + memset(dst_argb_opt, 101, kStrideA* kHeightA); \ MaskCpuFlags(disable_cpu_flags_); \ FMT_ATOB((TYPE_A*)(src_argb + OFF), kStrideA, (TYPE_A*)dst_argb_c, \ kStrideA, kWidth, NEG kHeight); \ @@ -916,8 +909,8 @@ TESTATOBD(ARGB, 4, 4, 1, RGB565, 2, 2, 1) FMT_ATOB((TYPE_A*)dst_argb_opt, kStrideA, (TYPE_A*)dst_argb_opt, kStrideA, \ kWidth, NEG kHeight); \ for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \ - ASSERT_EQ(src_argb[i + OFF], dst_argb_opt[i]); \ - ASSERT_EQ(dst_argb_c[i], dst_argb_opt[i]); \ + EXPECT_EQ(src_argb[i + OFF], dst_argb_opt[i]); \ + EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \ } \ free_aligned_buffer_page_end(src_argb); \ free_aligned_buffer_page_end(dst_argb_c); \ @@ -952,12 +945,12 @@ TESTEND(AB64ToAR64, uint16_t, 4, 4, 1) const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \ const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \ - align_buffer_page_end(src_y, kWidth * kHeight + OFF); \ + align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ align_buffer_page_end(src_u, kSizeUV + OFF); \ align_buffer_page_end(src_v, kSizeUV + OFF); \ - align_buffer_page_end(src_a, kWidth * kHeight + OFF); \ - align_buffer_page_end(dst_argb_c, kStrideB * kHeight + OFF); \ - align_buffer_page_end(dst_argb_opt, kStrideB * kHeight + OFF); \ + align_buffer_page_end(src_a, kWidth* kHeight + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF); \ + align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF); \ for (int i = 0; i < kWidth * kHeight; ++i) { \ src_y[i + OFF] = (fastrand() & 0xff); \ src_a[i + OFF] = (fastrand() & 0xff); \ @@ -981,7 +974,7 @@ TESTEND(AB64ToAR64, uint16_t, 4, 4, 1) ATTEN); \ } \ for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \ - ASSERT_EQ(dst_argb_c[i + OFF], dst_argb_opt[i + OFF]); \ + EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_opt[i + OFF]); \ } \ free_aligned_buffer_page_end(src_y); \ free_aligned_buffer_page_end(src_u); \ @@ -1171,7 +1164,7 @@ TEST_F(LibYUVConvertTest, TestYToARGB) { argb[i * 4 + 0], argb[i * 4 + 1], argb[i * 4 + 2], argb[i * 4 + 3]); } for (int i = 0; i < 32; ++i) { - ASSERT_EQ(expectedg[i], argb[i * 4 + 0]); + EXPECT_EQ(expectedg[i], argb[i * 4 + 0]); } } @@ -1193,7 +1186,7 @@ TEST_F(LibYUVConvertTest, TestNoDither) { benchmark_width_ * 2, kNoDither4x4, benchmark_width_, benchmark_height_); for (int i = 0; i < benchmark_width_ * benchmark_height_ * 2; ++i) { - ASSERT_EQ(dst_rgb565[i], dst_rgb565dither[i]); + EXPECT_EQ(dst_rgb565[i], dst_rgb565dither[i]); } free_aligned_buffer_page_end(src_argb); @@ -1230,7 +1223,7 @@ TEST_F(LibYUVConvertTest, TestDither) { benchmark_width_ * 4, benchmark_width_, benchmark_height_); for (int i = 0; i < benchmark_width_ * benchmark_height_ * 4; ++i) { - ASSERT_NEAR(dst_argb[i], dst_argbdither[i], 9); + EXPECT_NEAR(dst_argb[i], dst_argbdither[i], 9); } free_aligned_buffer_page_end(src_argb); free_aligned_buffer_page_end(dst_rgb565); @@ -1247,11 +1240,11 @@ TEST_F(LibYUVConvertTest, TestDither) { const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \ const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \ - align_buffer_page_end(src_y, kWidth * kHeight + OFF); \ + align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ align_buffer_page_end(src_u, kSizeUV + OFF); \ align_buffer_page_end(src_v, kSizeUV + OFF); \ - align_buffer_page_end(dst_argb_c, kStrideB * kHeight + OFF); \ - align_buffer_page_end(dst_argb_opt, kStrideB * kHeight + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF); \ + align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF); \ for (int i = 0; i < kWidth * kHeight; ++i) { \ src_y[i + OFF] = (fastrand() & 0xff); \ } \ @@ -1272,16 +1265,16 @@ TEST_F(LibYUVConvertTest, TestDither) { dst_argb_opt + OFF, kStrideB, NULL, kWidth, NEG kHeight); \ } \ /* Convert to ARGB so 565 is expanded to bytes that can be compared. */ \ - align_buffer_page_end(dst_argb32_c, kWidth * BPP_C * kHeight); \ - align_buffer_page_end(dst_argb32_opt, kWidth * BPP_C * kHeight); \ - memset(dst_argb32_c, 2, kWidth * BPP_C * kHeight); \ - memset(dst_argb32_opt, 102, kWidth * BPP_C * kHeight); \ + align_buffer_page_end(dst_argb32_c, kWidth* BPP_C* kHeight); \ + align_buffer_page_end(dst_argb32_opt, kWidth* BPP_C* kHeight); \ + memset(dst_argb32_c, 2, kWidth* BPP_C* kHeight); \ + memset(dst_argb32_opt, 102, kWidth* BPP_C* kHeight); \ FMT_B##To##FMT_C(dst_argb_c + OFF, kStrideB, dst_argb32_c, kWidth * BPP_C, \ kWidth, kHeight); \ FMT_B##To##FMT_C(dst_argb_opt + OFF, kStrideB, dst_argb32_opt, \ kWidth * BPP_C, kWidth, kHeight); \ for (int i = 0; i < kWidth * BPP_C * kHeight; ++i) { \ - ASSERT_EQ(dst_argb32_c[i], dst_argb32_opt[i]); \ + EXPECT_EQ(dst_argb32_c[i], dst_argb32_opt[i]); \ } \ free_aligned_buffer_page_end(src_y); \ free_aligned_buffer_page_end(src_u); \ @@ -1324,10 +1317,10 @@ TESTPLANARTOBD(I420, 2, 2, RGB565, 2, 2, 1, ARGB, 4) const int kStrideB = SUBSAMPLE(kWidth, SUB_B) * BPP_B; \ const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \ - align_buffer_page_end(src_y, kWidth * kHeight + OFF); \ + align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ align_buffer_page_end(src_u, kSizeUV + OFF); \ align_buffer_page_end(src_v, kSizeUV + OFF); \ - align_buffer_page_end(dst_argb_b, kStrideB * kHeight + OFF); \ + align_buffer_page_end(dst_argb_b, kStrideB* kHeight + OFF); \ for (int i = 0; i < kWidth * kHeight; ++i) { \ src_y[i + OFF] = (fastrand() & 0xff); \ } \ @@ -1341,8 +1334,8 @@ TESTPLANARTOBD(I420, 2, 2, RGB565, 2, 2, 1, ARGB, 4) kWidth, NEG kHeight); \ /* Convert to a 3rd format in 1 step and 2 steps and compare */ \ const int kStrideC = kWidth * BPP_C; \ - align_buffer_page_end(dst_argb_c, kStrideC * kHeight + OFF); \ - align_buffer_page_end(dst_argb_bc, kStrideC * kHeight + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideC* kHeight + OFF); \ + align_buffer_page_end(dst_argb_bc, kStrideC* kHeight + OFF); \ memset(dst_argb_c + OFF, 2, kStrideC * kHeight); \ memset(dst_argb_bc + OFF, 3, kStrideC * kHeight); \ for (int i = 0; i < benchmark_iterations_; ++i) { \ @@ -1354,7 +1347,7 @@ TESTPLANARTOBD(I420, 2, 2, RGB565, 2, 2, 1, ARGB, 4) kStrideC, kWidth, kHeight); \ } \ for (int i = 0; i < kStrideC * kHeight; ++i) { \ - ASSERT_EQ(dst_argb_c[i + OFF], dst_argb_bc[i + OFF]); \ + EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_bc[i + OFF]); \ } \ free_aligned_buffer_page_end(src_y); \ free_aligned_buffer_page_end(src_u); \ @@ -1471,14 +1464,14 @@ TESTPLANARTOE(I444, 1, 1, ABGR, 1, 4, ARGB, 4) const int kStrideB = SUBSAMPLE(kWidth, SUB_B) * BPP_B; \ const int kSizeUV = \ SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y); \ - align_buffer_page_end(src_y, kWidth * kHeight + OFF); \ + align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ align_buffer_page_end(src_u, kSizeUV + OFF); \ align_buffer_page_end(src_v, kSizeUV + OFF); \ - align_buffer_page_end(src_a, kWidth * kHeight + OFF); \ - align_buffer_page_end(dst_argb_b, kStrideB * kHeight + OFF); \ + align_buffer_page_end(src_a, kWidth* kHeight + OFF); \ + align_buffer_page_end(dst_argb_b, kStrideB* kHeight + OFF); \ const int kStrideC = kWidth * BPP_C; \ - align_buffer_page_end(dst_argb_c, kStrideC * kHeight + OFF); \ - align_buffer_page_end(dst_argb_bc, kStrideC * kHeight + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideC* kHeight + OFF); \ + align_buffer_page_end(dst_argb_bc, kStrideC* kHeight + OFF); \ memset(dst_argb_c + OFF, 2, kStrideC * kHeight); \ memset(dst_argb_b + OFF, 1, kStrideB * kHeight); \ memset(dst_argb_bc + OFF, 3, kStrideC * kHeight); \ @@ -1506,7 +1499,7 @@ TESTPLANARTOE(I444, 1, 1, ABGR, 1, 4, ARGB, 4) src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), src_a + OFF, kWidth, \ dst_argb_c + OFF, kStrideC, kWidth, NEG kHeight, ATTEN); \ for (int i = 0; i < kStrideC * kHeight; ++i) { \ - ASSERT_EQ(dst_argb_c[i + OFF], dst_argb_bc[i + OFF]); \ + EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_bc[i + OFF]); \ } \ free_aligned_buffer_page_end(src_y); \ free_aligned_buffer_page_end(src_u); \ @@ -1585,16 +1578,16 @@ TESTQPLANARTOE(I444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4) const int kHeight = benchmark_height_; \ const int kStrideA = SUBSAMPLE(kWidth, SUB_A) * BPP_A; \ const int kStrideB = SUBSAMPLE(kWidth, SUB_B) * BPP_B; \ - align_buffer_page_end(src_argb_a, kStrideA * kHeight + OFF); \ - align_buffer_page_end(dst_argb_b, kStrideB * kHeight + OFF); \ + align_buffer_page_end(src_argb_a, kStrideA* kHeight + OFF); \ + align_buffer_page_end(dst_argb_b, kStrideB* kHeight + OFF); \ MemRandomize(src_argb_a + OFF, kStrideA * kHeight); \ memset(dst_argb_b + OFF, 1, kStrideB * kHeight); \ FMT_A##To##FMT_B(src_argb_a + OFF, kStrideA, dst_argb_b + OFF, kStrideB, \ kWidth, NEG kHeight); \ /* Convert to a 3rd format in 1 step and 2 steps and compare */ \ const int kStrideC = kWidth * BPP_C; \ - align_buffer_page_end(dst_argb_c, kStrideC * kHeight + OFF); \ - align_buffer_page_end(dst_argb_bc, kStrideC * kHeight + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideC* kHeight + OFF); \ + align_buffer_page_end(dst_argb_bc, kStrideC* kHeight + OFF); \ memset(dst_argb_c + OFF, 2, kStrideC * kHeight); \ memset(dst_argb_bc + OFF, 3, kStrideC * kHeight); \ for (int i = 0; i < benchmark_iterations_; ++i) { \ @@ -1605,10 +1598,10 @@ TESTQPLANARTOE(I444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4) kStrideC, kWidth, kHeight); \ } \ for (int i = 0; i < kStrideC * kHeight; i += 4) { \ - ASSERT_EQ(dst_argb_c[i + OFF + 0], dst_argb_bc[i + OFF + 0]); \ - ASSERT_EQ(dst_argb_c[i + OFF + 1], dst_argb_bc[i + OFF + 1]); \ - ASSERT_EQ(dst_argb_c[i + OFF + 2], dst_argb_bc[i + OFF + 2]); \ - ASSERT_NEAR(dst_argb_c[i + OFF + 3], dst_argb_bc[i + OFF + 3], 64); \ + EXPECT_EQ(dst_argb_c[i + OFF + 0], dst_argb_bc[i + OFF + 0]); \ + EXPECT_EQ(dst_argb_c[i + OFF + 1], dst_argb_bc[i + OFF + 1]); \ + EXPECT_EQ(dst_argb_c[i + OFF + 2], dst_argb_bc[i + OFF + 2]); \ + EXPECT_NEAR(dst_argb_c[i + OFF + 3], dst_argb_bc[i + OFF + 3], 64); \ } \ free_aligned_buffer_page_end(src_argb_a); \ free_aligned_buffer_page_end(dst_argb_b); \ @@ -1671,12 +1664,12 @@ TEST_F(LibYUVConvertTest, RotateWithARGBSource) { 2, // crop height kRotate90, FOURCC_ARGB); - ASSERT_EQ(r, 0); + EXPECT_EQ(r, 0); // 90 degrees rotation, no conversion - ASSERT_EQ(dst[0], src[2]); - ASSERT_EQ(dst[1], src[0]); - ASSERT_EQ(dst[2], src[3]); - ASSERT_EQ(dst[3], src[1]); + EXPECT_EQ(dst[0], src[2]); + EXPECT_EQ(dst[1], src[0]); + EXPECT_EQ(dst[2], src[3]); + EXPECT_EQ(dst[3], src[1]); } #ifdef HAS_ARGBTOAR30ROW_AVX2 @@ -1704,7 +1697,7 @@ TEST_F(LibYUVConvertTest, ARGBToAR30Row_Opt) { } } for (int i = 0; i < kPixels * 4; ++i) { - ASSERT_EQ(dst_opt[i], dst_c[i]); + EXPECT_EQ(dst_opt[i], dst_c[i]); } free_aligned_buffer_page_end(src); @@ -1738,7 +1731,7 @@ TEST_F(LibYUVConvertTest, ABGRToAR30Row_Opt) { } } for (int i = 0; i < kPixels * 4; ++i) { - ASSERT_EQ(dst_opt[i], dst_c[i]); + EXPECT_EQ(dst_opt[i], dst_c[i]); } free_aligned_buffer_page_end(src); @@ -1805,11 +1798,11 @@ TEST_F(LibYUVConvertTest, ABGRToAR30Row_Opt) { const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \ const int kBpc = 2; \ - align_buffer_page_end(src_y, kWidth * kHeight * kBpc + SOFF); \ - align_buffer_page_end(src_u, kSizeUV * kBpc + SOFF); \ - align_buffer_page_end(src_v, kSizeUV * kBpc + SOFF); \ - align_buffer_page_end(dst_argb_c, kStrideB * kHeight + DOFF); \ - align_buffer_page_end(dst_argb_opt, kStrideB * kHeight + DOFF); \ + align_buffer_page_end(src_y, kWidth* kHeight* kBpc + SOFF); \ + align_buffer_page_end(src_u, kSizeUV* kBpc + SOFF); \ + align_buffer_page_end(src_v, kSizeUV* kBpc + SOFF); \ + align_buffer_page_end(dst_argb_c, kStrideB* kHeight + DOFF); \ + align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + DOFF); \ for (int i = 0; i < kWidth * kHeight; ++i) { \ reinterpret_cast(src_y + SOFF)[i] = (fastrand() & FMT_MASK); \ } \ @@ -1834,7 +1827,7 @@ TEST_F(LibYUVConvertTest, ABGRToAR30Row_Opt) { dst_argb_opt + DOFF, kStrideB, kWidth, NEG kHeight); \ } \ for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \ - ASSERT_EQ(dst_argb_c[i + DOFF], dst_argb_opt[i + DOFF]); \ + EXPECT_EQ(dst_argb_c[i + DOFF], dst_argb_opt[i + DOFF]); \ } \ free_aligned_buffer_page_end(src_y); \ free_aligned_buffer_page_end(src_u); \ @@ -1920,12 +1913,12 @@ TESTPLANAR16TOB(I210, 2, 1, 0x3ff, AR30Filter, 4, 4, 1) const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \ const int kBpc = 2; \ - align_buffer_page_end(src_y, kWidth * kHeight * kBpc + OFF); \ - align_buffer_page_end(src_u, kSizeUV * kBpc + OFF); \ - align_buffer_page_end(src_v, kSizeUV * kBpc + OFF); \ - align_buffer_page_end(src_a, kWidth * kHeight * kBpc + OFF); \ - align_buffer_page_end(dst_argb_c, kStrideB * kHeight + OFF); \ - align_buffer_page_end(dst_argb_opt, kStrideB * kHeight + OFF); \ + align_buffer_page_end(src_y, kWidth* kHeight* kBpc + OFF); \ + align_buffer_page_end(src_u, kSizeUV* kBpc + OFF); \ + align_buffer_page_end(src_v, kSizeUV* kBpc + OFF); \ + align_buffer_page_end(src_a, kWidth* kHeight* kBpc + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF); \ + align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF); \ for (int i = 0; i < kWidth * kHeight; ++i) { \ reinterpret_cast(src_y + OFF)[i] = \ (fastrand() & ((1 << S_DEPTH) - 1)); \ @@ -1957,7 +1950,7 @@ TESTPLANAR16TOB(I210, 2, 1, 0x3ff, AR30Filter, 4, 4, 1) dst_argb_opt + OFF, kStrideB, kWidth, NEG kHeight, ATTEN); \ } \ for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \ - ASSERT_EQ(dst_argb_c[i + OFF], dst_argb_opt[i + OFF]); \ + EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_opt[i + OFF]); \ } \ free_aligned_buffer_page_end(src_y); \ free_aligned_buffer_page_end(src_u); \ @@ -2153,10 +2146,10 @@ TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10) const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X) * 2; \ const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y) * 2; \ const int kBpc = 2; \ - align_buffer_page_end(src_y, kWidth * kHeight * kBpc + SOFF); \ - align_buffer_page_end(src_uv, kSizeUV * kBpc + SOFF); \ - align_buffer_page_end(dst_argb_c, kStrideB * kHeight + DOFF); \ - align_buffer_page_end(dst_argb_opt, kStrideB * kHeight + DOFF); \ + align_buffer_page_end(src_y, kWidth* kHeight* kBpc + SOFF); \ + align_buffer_page_end(src_uv, kSizeUV* kBpc + SOFF); \ + align_buffer_page_end(dst_argb_c, kStrideB* kHeight + DOFF); \ + align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + DOFF); \ for (int i = 0; i < kWidth * kHeight; ++i) { \ reinterpret_cast(src_y + SOFF)[i] = \ (fastrand() & (((uint16_t)(-1)) << (16 - S_DEPTH))); \ @@ -2180,7 +2173,7 @@ TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10) NEG kHeight); \ } \ for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \ - ASSERT_EQ(dst_argb_c[i + DOFF], dst_argb_opt[i + DOFF]); \ + EXPECT_EQ(dst_argb_c[i + DOFF], dst_argb_opt[i + DOFF]); \ } \ free_aligned_buffer_page_end(src_y); \ free_aligned_buffer_page_end(src_uv); \ @@ -2323,10 +2316,10 @@ TEST_F(LibYUVConvertTest, TestH420ToARGB) { ++histogram_r[r]; // Reference formula for Y channel contribution in YUV to RGB conversions: int expected_y = Clamp(static_cast((i - 16) * 1.164f + 0.5f)); - ASSERT_EQ(b, expected_y); - ASSERT_EQ(g, expected_y); - ASSERT_EQ(r, expected_y); - ASSERT_EQ(a, 255); + EXPECT_EQ(b, expected_y); + EXPECT_EQ(g, expected_y); + EXPECT_EQ(r, expected_y); + EXPECT_EQ(a, 255); } int count_b = 0; @@ -2384,10 +2377,10 @@ TEST_F(LibYUVConvertTest, TestH010ToARGB) { ++histogram_g[g]; ++histogram_r[r]; int expected_y = Clamp(static_cast((i - 64) * 1.164f / 4)); - ASSERT_NEAR(b, expected_y, 1); - ASSERT_NEAR(g, expected_y, 1); - ASSERT_NEAR(r, expected_y, 1); - ASSERT_EQ(a, 255); + EXPECT_NEAR(b, expected_y, 1); + EXPECT_NEAR(g, expected_y, 1); + EXPECT_NEAR(r, expected_y, 1); + EXPECT_EQ(a, 255); } int count_b = 0; @@ -2448,10 +2441,10 @@ TEST_F(LibYUVConvertTest, TestH010ToAR30) { ++histogram_g[g10]; ++histogram_r[r10]; int expected_y = Clamp10(static_cast((i - 64) * 1.164f + 0.5)); - ASSERT_NEAR(b10, expected_y, 4); - ASSERT_NEAR(g10, expected_y, 4); - ASSERT_NEAR(r10, expected_y, 4); - ASSERT_EQ(a2, 3); + EXPECT_NEAR(b10, expected_y, 4); + EXPECT_NEAR(g10, expected_y, 4); + EXPECT_NEAR(r10, expected_y, 4); + EXPECT_EQ(a2, 3); } int count_b = 0; @@ -2512,10 +2505,10 @@ TEST_F(LibYUVConvertTest, TestH010ToAB30) { ++histogram_g[g10]; ++histogram_r[r10]; int expected_y = Clamp10(static_cast((i - 64) * 1.164f)); - ASSERT_NEAR(b10, expected_y, 4); - ASSERT_NEAR(g10, expected_y, 4); - ASSERT_NEAR(r10, expected_y, 4); - ASSERT_EQ(a2, 3); + EXPECT_NEAR(b10, expected_y, 4); + EXPECT_NEAR(g10, expected_y, 4); + EXPECT_NEAR(r10, expected_y, 4); + EXPECT_EQ(a2, 3); } int count_b = 0; @@ -2574,10 +2567,10 @@ TEST_F(LibYUVConvertTest, TestH420ToAR30) { ++histogram_g[g10]; ++histogram_r[r10]; int expected_y = Clamp10(static_cast((i - 16) * 1.164f * 4.f)); - ASSERT_NEAR(b10, expected_y, 4); - ASSERT_NEAR(g10, expected_y, 4); - ASSERT_NEAR(r10, expected_y, 4); - ASSERT_EQ(a2, 3); + EXPECT_NEAR(b10, expected_y, 4); + EXPECT_NEAR(g10, expected_y, 4); + EXPECT_NEAR(r10, expected_y, 4); + EXPECT_EQ(a2, 3); } int count_b = 0; @@ -2624,34 +2617,34 @@ TEST_F(LibYUVConvertTest, TestI400) { I400ToARGBMatrix(orig_i400, 0, argb_pixels_2020_i400, 0, &kYuv2020Constants, kSize, 1); - ASSERT_EQ(0, argb_pixels_i400[0]); - ASSERT_EQ(0, argb_pixels_j400[0]); - ASSERT_EQ(0, argb_pixels_jpeg_i400[0]); - ASSERT_EQ(0, argb_pixels_h709_i400[0]); - ASSERT_EQ(0, argb_pixels_2020_i400[0]); - ASSERT_EQ(0, argb_pixels_i400[16 * 4]); - ASSERT_EQ(16, argb_pixels_j400[16 * 4]); - ASSERT_EQ(16, argb_pixels_jpeg_i400[16 * 4]); - ASSERT_EQ(0, argb_pixels_h709_i400[16 * 4]); - ASSERT_EQ(0, argb_pixels_2020_i400[16 * 4]); - ASSERT_EQ(130, argb_pixels_i400[128 * 4]); - ASSERT_EQ(128, argb_pixels_j400[128 * 4]); - ASSERT_EQ(128, argb_pixels_jpeg_i400[128 * 4]); - ASSERT_EQ(130, argb_pixels_h709_i400[128 * 4]); - ASSERT_EQ(130, argb_pixels_2020_i400[128 * 4]); - ASSERT_EQ(255, argb_pixels_i400[255 * 4]); - ASSERT_EQ(255, argb_pixels_j400[255 * 4]); - ASSERT_EQ(255, argb_pixels_jpeg_i400[255 * 4]); - ASSERT_EQ(255, argb_pixels_h709_i400[255 * 4]); - ASSERT_EQ(255, argb_pixels_2020_i400[255 * 4]); + EXPECT_EQ(0, argb_pixels_i400[0]); + EXPECT_EQ(0, argb_pixels_j400[0]); + EXPECT_EQ(0, argb_pixels_jpeg_i400[0]); + EXPECT_EQ(0, argb_pixels_h709_i400[0]); + EXPECT_EQ(0, argb_pixels_2020_i400[0]); + EXPECT_EQ(0, argb_pixels_i400[16 * 4]); + EXPECT_EQ(16, argb_pixels_j400[16 * 4]); + EXPECT_EQ(16, argb_pixels_jpeg_i400[16 * 4]); + EXPECT_EQ(0, argb_pixels_h709_i400[16 * 4]); + EXPECT_EQ(0, argb_pixels_2020_i400[16 * 4]); + EXPECT_EQ(130, argb_pixels_i400[128 * 4]); + EXPECT_EQ(128, argb_pixels_j400[128 * 4]); + EXPECT_EQ(128, argb_pixels_jpeg_i400[128 * 4]); + EXPECT_EQ(130, argb_pixels_h709_i400[128 * 4]); + EXPECT_EQ(130, argb_pixels_2020_i400[128 * 4]); + EXPECT_EQ(255, argb_pixels_i400[255 * 4]); + EXPECT_EQ(255, argb_pixels_j400[255 * 4]); + EXPECT_EQ(255, argb_pixels_jpeg_i400[255 * 4]); + EXPECT_EQ(255, argb_pixels_h709_i400[255 * 4]); + EXPECT_EQ(255, argb_pixels_2020_i400[255 * 4]); for (int i = 0; i < kSize * 4; ++i) { if ((i & 3) == 3) { - ASSERT_EQ(255, argb_pixels_j400[i]); + EXPECT_EQ(255, argb_pixels_j400[i]); } else { - ASSERT_EQ(i / 4, argb_pixels_j400[i]); + EXPECT_EQ(i / 4, argb_pixels_j400[i]); } - ASSERT_EQ(argb_pixels_jpeg_i400[i], argb_pixels_j400[i]); + EXPECT_EQ(argb_pixels_jpeg_i400[i], argb_pixels_j400[i]); } free_aligned_buffer_page_end(orig_i400); @@ -2678,7 +2671,7 @@ TEST_F(LibYUVConvertTest, TestARGBToRGB24) { ARGBToRGB24(argb_pixels, 0, dest_rgb24, 0, kSize, 1); for (int i = 0; i < kSize * 3; ++i) { - ASSERT_EQ(orig_rgb24[i], dest_rgb24[i]); + EXPECT_EQ(orig_rgb24[i], dest_rgb24[i]); } free_aligned_buffer_page_end(orig_rgb24); @@ -2697,7 +2690,7 @@ TEST_F(LibYUVConvertTest, TestARGBToRGB565) { } ARGBToRGB565(&orig_pixels[0][0], 0, &dest_rgb565[0][0], 0, 256, 1); uint32_t checksum = HashDjb2(&dest_rgb565[0][0], sizeof(dest_rgb565), 5381); - ASSERT_EQ(610919429u, checksum); + EXPECT_EQ(610919429u, checksum); } TEST_F(LibYUVConvertTest, TestYUY2ToARGB) { @@ -2712,9 +2705,9 @@ TEST_F(LibYUVConvertTest, TestYUY2ToARGB) { YUY2ToARGB(&orig_pixels[0][0], 0, &dest_argb[0][0], 0, 256, 1); uint32_t checksum = HashDjb2(&dest_argb[0][0], sizeof(dest_argb), 5381); #if defined(LIBYUV_UNLIMITED_DATA) - ASSERT_EQ(10343289u, checksum); + EXPECT_EQ(10343289u, checksum); #else - ASSERT_EQ(3486643515u, checksum); + EXPECT_EQ(3486643515u, checksum); #endif } @@ -2730,9 +2723,9 @@ TEST_F(LibYUVConvertTest, TestUYVYToARGB) { UYVYToARGB(&orig_pixels[0][0], 0, &dest_argb[0][0], 0, 256, 1); uint32_t checksum = HashDjb2(&dest_argb[0][0], sizeof(dest_argb), 5381); #if defined(LIBYUV_UNLIMITED_DATA) - ASSERT_EQ(10343289u, checksum); + EXPECT_EQ(10343289u, checksum); #else - ASSERT_EQ(3486643515u, checksum); + EXPECT_EQ(3486643515u, checksum); #endif } @@ -2810,9 +2803,9 @@ TEST_F(LibYUVConvertTest, TestARGBToUVRow) { printf("\n"); uint32_t checksum_u = HashDjb2(&dest_u[0], sizeof(dest_u), 5381); - ASSERT_EQ(192508756u, checksum_u); + EXPECT_EQ(192508756u, checksum_u); uint32_t checksum_v = HashDjb2(&dest_v[0], sizeof(dest_v), 5381); - ASSERT_EQ(2590663990u, checksum_v); + EXPECT_EQ(2590663990u, checksum_v); } #endif @@ -2838,23 +2831,16 @@ TEST_F(LibYUVConvertTest, TestARGBToUVMatrixRow_Opt) { memset(dest_v_c, 0, sizeof(dest_v_c)); memset(dest_u_opt, 0, sizeof(dest_u_opt)); memset(dest_v_opt, 0, sizeof(dest_v_opt)); - + int src_stride = (height == 1) ? 0 : kMaxWidth * 4; - ARGBToUVMatrixRow_C(&orig_argb_pixels[0], src_stride, &dest_u_c[0], - &dest_v_c[0], width, &kArgbI601Constants); - ARGBToUVMatrixRow_Any_NEON(&orig_argb_pixels[0], src_stride, - &dest_u_opt[0], &dest_v_opt[0], width, - &kArgbI601Constants); + ARGBToUVMatrixRow_C(&orig_argb_pixels[0], src_stride, &dest_u_c[0], &dest_v_c[0], width, &kArgbI601Constants); + ARGBToUVMatrixRow_Any_NEON(&orig_argb_pixels[0], src_stride, &dest_u_opt[0], &dest_v_opt[0], width, &kArgbI601Constants); int half_width = (width + 1) / 2; for (int i = 0; i < half_width; ++i) { - ASSERT_EQ(dest_u_c[i], dest_u_opt[i]) - << "u mismatch at " << i << " width " << width << " height " - << height; - ASSERT_EQ(dest_v_c[i], dest_v_opt[i]) - << "v mismatch at " << i << " width " << width << " height " - << height; + EXPECT_EQ(dest_u_c[i], dest_u_opt[i]) << "u mismatch at " << i << " width " << width << " height " << height; + EXPECT_EQ(dest_v_c[i], dest_v_opt[i]) << "v mismatch at " << i << " width " << width << " height " << height; } } } @@ -2867,7 +2853,6 @@ TEST_F(LibYUVConvertTest, TestARGBToUVMatrixRow_Opt) { (defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__)) // TODO(fbarchard): Consider _set_new_mode(0) to make malloc return NULL -#ifndef DISABLE_SLOW_TESTS TEST_F(LibYUVConvertTest, TestI400LargeSize) { // The width and height are chosen as follows: // - kWidth * kHeight is not a multiple of 8: This lets us to use the Any @@ -2911,18 +2896,18 @@ TEST_F(LibYUVConvertTest, TestI400LargeSize) { for (int i = 0; i < kWidth * kHeight; ++i) { orig_i400[i] = i % 256; } - ASSERT_EQ(I400ToARGBMatrix(orig_i400, kStride, dest_argb, kWidth, + EXPECT_EQ(I400ToARGBMatrix(orig_i400, kStride, dest_argb, kWidth, &kYuvJPEGConstants, kWidth, kHeight), 0); free_aligned_buffer_page_end(dest_argb); free_aligned_buffer_page_end(orig_i400); } -#endif // DISABLE_SLOW_TESTS #endif // !defined(DISABLE_SLOW_TESTS) && \ // (defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__)) #endif // !defined(LEAN_TESTS) + #define TESTATOBPI(FMT_A, TYPE_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, SUBSAMP_X, \ SUBSAMP_Y, W1280, N, NEG, OFF) \ TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##N) { \ @@ -2935,17 +2920,17 @@ TEST_F(LibYUVConvertTest, TestI400LargeSize) { const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X) * 2; \ const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \ align_buffer_page_end(src_argb, \ - kStrideA * kHeightA * (int)sizeof(TYPE_A) + OFF); \ - align_buffer_page_end(dst_y_c, kStrideY * kHeight); \ + kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \ + align_buffer_page_end(dst_y_c, kStrideY* kHeight); \ align_buffer_page_end(dst_uv_c, kSizeUV); \ - align_buffer_page_end(dst_y_opt, kStrideY * kHeight); \ + align_buffer_page_end(dst_y_opt, kStrideY* kHeight); \ align_buffer_page_end(dst_uv_opt, kSizeUV); \ for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \ src_argb[i + OFF] = (fastrand() & 0xff); \ } \ - memset(dst_y_c, 1, kStrideY * kHeight); \ + memset(dst_y_c, 1, kStrideY* kHeight); \ memset(dst_uv_c, 2, kSizeUV); \ - memset(dst_y_opt, 101, kStrideY * kHeight); \ + memset(dst_y_opt, 101, kStrideY* kHeight); \ memset(dst_uv_opt, 102, kSizeUV); \ MaskCpuFlags(disable_cpu_flags_); \ FMT_A##To##FMT_B((TYPE_A*)(src_argb + OFF), kStrideA, dst_y_c, kStrideY, \ @@ -2956,10 +2941,10 @@ TEST_F(LibYUVConvertTest, TestI400LargeSize) { kStrideY, dst_uv_opt, kStrideUV, kWidth, NEG kHeight); \ } \ for (int i = 0; i < kStrideY * kHeight; ++i) { \ - ASSERT_EQ(dst_y_c[i], dst_y_opt[i]); \ + EXPECT_EQ(dst_y_c[i], dst_y_opt[i]); \ } \ for (int i = 0; i < kSizeUV; ++i) { \ - ASSERT_EQ(dst_uv_c[i], dst_uv_opt[i]); \ + EXPECT_EQ(dst_uv_c[i], dst_uv_opt[i]); \ } \ free_aligned_buffer_page_end(src_argb); \ free_aligned_buffer_page_end(dst_y_c); \ diff --git a/unit_test/convert_test.cc b/unit_test/convert_test.cc index a38e7fdf9..3d5ce3799 100644 --- a/unit_test/convert_test.cc +++ b/unit_test/convert_test.cc @@ -51,9 +51,9 @@ namespace libyuv { #define ABGRToABGR ARGBCopy // subsample amount uses a divide. -#define SUBSAMPLE(v, a) ((((v) + (a) - 1)) / (a)) +#define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a)) -#define ALIGNINT(V, ALIGN) (((V) + (ALIGN) - 1) / (ALIGN) * (ALIGN)) +#define ALIGNINT(V, ALIGN) (((V) + (ALIGN)-1) / (ALIGN) * (ALIGN)) // Planar test @@ -78,19 +78,17 @@ namespace libyuv { const int kSrcHalfHeight = SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \ const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \ const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \ - align_buffer_page_end(src_y, kWidth * kHeight * SRC_BPC + OFF); \ + align_buffer_page_end(src_y, kWidth* kHeight* SRC_BPC + OFF); \ align_buffer_page_end(src_u, \ - kSrcHalfWidth * kSrcHalfHeight * SRC_BPC + OFF); \ + kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \ align_buffer_page_end(src_v, \ - kSrcHalfWidth * kSrcHalfHeight * SRC_BPC + OFF); \ - align_buffer_page_end(dst_y_c, kWidth * kHeight * DST_BPC); \ - align_buffer_page_end(dst_u_c, kDstHalfWidth * kDstHalfHeight * DST_BPC); \ - align_buffer_page_end(dst_v_c, kDstHalfWidth * kDstHalfHeight * DST_BPC); \ - align_buffer_page_end(dst_y_opt, kWidth * kHeight * DST_BPC); \ - align_buffer_page_end(dst_u_opt, \ - kDstHalfWidth * kDstHalfHeight * DST_BPC); \ - align_buffer_page_end(dst_v_opt, \ - kDstHalfWidth * kDstHalfHeight * DST_BPC); \ + kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \ + align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \ + align_buffer_page_end(dst_u_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + align_buffer_page_end(dst_v_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \ + align_buffer_page_end(dst_u_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + align_buffer_page_end(dst_v_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ MemRandomize(src_y + OFF, kWidth * kHeight * SRC_BPC); \ MemRandomize(src_u + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \ MemRandomize(src_v + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \ @@ -104,12 +102,12 @@ namespace libyuv { src_u_p[i] = src_u_p[i] & ((1 << SRC_DEPTH) - 1); \ src_v_p[i] = src_v_p[i] & ((1 << SRC_DEPTH) - 1); \ } \ - memset(dst_y_c, 1, kWidth * kHeight * DST_BPC); \ - memset(dst_u_c, 2, kDstHalfWidth * kDstHalfHeight * DST_BPC); \ - memset(dst_v_c, 3, kDstHalfWidth * kDstHalfHeight * DST_BPC); \ - memset(dst_y_opt, 101, kWidth * kHeight * DST_BPC); \ - memset(dst_u_opt, 102, kDstHalfWidth * kDstHalfHeight * DST_BPC); \ - memset(dst_v_opt, 103, kDstHalfWidth * kDstHalfHeight * DST_BPC); \ + memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \ + memset(dst_u_c, 2, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + memset(dst_v_c, 3, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \ + memset(dst_u_opt, 102, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ + memset(dst_v_opt, 103, kDstHalfWidth* kDstHalfHeight* DST_BPC); \ MaskCpuFlags(disable_cpu_flags_); \ SRC_FMT_PLANAR##To##FMT_PLANAR( \ src_y_p, kWidth, src_u_p, kSrcHalfWidth, src_v_p, kSrcHalfWidth, \ @@ -127,11 +125,11 @@ namespace libyuv { NEG kHeight); \ } \ for (int i = 0; i < kHeight * kWidth * DST_BPC; ++i) { \ - ASSERT_EQ(dst_y_c[i], dst_y_opt[i]); \ + EXPECT_EQ(dst_y_c[i], dst_y_opt[i]); \ } \ for (int i = 0; i < kDstHalfWidth * kDstHalfHeight * DST_BPC; ++i) { \ - ASSERT_EQ(dst_u_c[i], dst_u_opt[i]); \ - ASSERT_EQ(dst_v_c[i], dst_v_opt[i]); \ + EXPECT_EQ(dst_u_c[i], dst_u_opt[i]); \ + EXPECT_EQ(dst_v_c[i], dst_v_opt[i]); \ } \ free_aligned_buffer_page_end(dst_y_c); \ free_aligned_buffer_page_end(dst_u_c); \ @@ -214,15 +212,15 @@ TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 12) const int kHeight = benchmark_height_; \ const int kSizeUV = \ SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \ - align_buffer_page_end(src_y, kWidth * kHeight + OFF); \ + align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ align_buffer_page_end(src_uv, \ - kSizeUV * ((PIXEL_STRIDE == 3) ? 3 : 2) + OFF); \ - align_buffer_page_end(dst_y_c, kWidth * kHeight); \ + kSizeUV*((PIXEL_STRIDE == 3) ? 3 : 2) + OFF); \ + align_buffer_page_end(dst_y_c, kWidth* kHeight); \ align_buffer_page_end(dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \ SUBSAMPLE(kHeight, SUBSAMP_Y)); \ align_buffer_page_end(dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \ SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - align_buffer_page_end(dst_y_opt, kWidth * kHeight); \ + align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ align_buffer_page_end(dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \ SUBSAMPLE(kHeight, SUBSAMP_Y)); \ align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \ @@ -241,12 +239,12 @@ TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 12) (fastrand() & 0xff); \ } \ } \ - memset(dst_y_c, 1, kWidth * kHeight); \ + memset(dst_y_c, 1, kWidth* kHeight); \ memset(dst_u_c, 2, \ SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ memset(dst_v_c, 3, \ SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - memset(dst_y_opt, 101, kWidth * kHeight); \ + memset(dst_y_opt, 101, kWidth* kHeight); \ memset(dst_u_opt, 102, \ SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ memset(dst_v_opt, 103, \ @@ -267,18 +265,18 @@ TESTPLANARTOP(I412, uint16_t, 2, 1, 1, I444, uint8_t, 1, 1, 1, 12) } \ for (int i = 0; i < kHeight; ++i) { \ for (int j = 0; j < kWidth; ++j) { \ - ASSERT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \ + EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \ } \ } \ for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ - ASSERT_EQ(dst_u_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j], \ + EXPECT_EQ(dst_u_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j], \ dst_u_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]); \ } \ } \ for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ - ASSERT_EQ(dst_v_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j], \ + EXPECT_EQ(dst_v_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j], \ dst_v_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]); \ } \ } \ @@ -361,17 +359,17 @@ static int I400ToNV21(const uint8_t* src_y, const int kSrcHalfHeight = SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \ const int kDstHalfWidth = SUBSAMPLE(kWidth, DST_SUBSAMP_X); \ const int kDstHalfHeight = SUBSAMPLE(kHeight, DST_SUBSAMP_Y); \ - align_buffer_page_end(src_y, kWidth * kHeight * SRC_BPC + OFF); \ + align_buffer_page_end(src_y, kWidth* kHeight* SRC_BPC + OFF); \ align_buffer_page_end(src_u, \ - kSrcHalfWidth * kSrcHalfHeight * SRC_BPC + OFF); \ + kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \ align_buffer_page_end(src_v, \ - kSrcHalfWidth * kSrcHalfHeight * SRC_BPC + OFF); \ - align_buffer_page_end(dst_y_c, kWidth * kHeight * DST_BPC); \ + kSrcHalfWidth* kSrcHalfHeight* SRC_BPC + OFF); \ + align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \ align_buffer_page_end(dst_uv_c, \ - kDstHalfWidth * kDstHalfHeight * DST_BPC * 2); \ - align_buffer_page_end(dst_y_opt, kWidth * kHeight * DST_BPC); \ + kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \ + align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \ align_buffer_page_end(dst_uv_opt, \ - kDstHalfWidth * kDstHalfHeight * DST_BPC * 2); \ + kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \ MemRandomize(src_y + OFF, kWidth * kHeight * SRC_BPC); \ MemRandomize(src_u + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \ MemRandomize(src_v + OFF, kSrcHalfWidth * kSrcHalfHeight * SRC_BPC); \ @@ -385,10 +383,10 @@ static int I400ToNV21(const uint8_t* src_y, src_u_p[i] = src_u_p[i] & ((1 << SRC_DEPTH) - 1); \ src_v_p[i] = src_v_p[i] & ((1 << SRC_DEPTH) - 1); \ } \ - memset(dst_y_c, 1, kWidth * kHeight * DST_BPC); \ - memset(dst_uv_c, 2, kDstHalfWidth * kDstHalfHeight * DST_BPC * 2); \ - memset(dst_y_opt, 101, kWidth * kHeight * DST_BPC); \ - memset(dst_uv_opt, 102, kDstHalfWidth * kDstHalfHeight * DST_BPC * 2); \ + memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \ + memset(dst_uv_c, 2, kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \ + memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \ + memset(dst_uv_opt, 102, kDstHalfWidth* kDstHalfHeight* DST_BPC * 2); \ MaskCpuFlags(disable_cpu_flags_); \ SRC_FMT_PLANAR##To##FMT_PLANAR(src_y_p, kWidth, src_u_p, kSrcHalfWidth, \ src_v_p, kSrcHalfWidth, \ @@ -404,10 +402,10 @@ static int I400ToNV21(const uint8_t* src_y, NEG kHeight); \ } \ for (int i = 0; i < kHeight * kWidth * DST_BPC; ++i) { \ - ASSERT_EQ(dst_y_c[i], dst_y_opt[i]); \ + EXPECT_EQ(dst_y_c[i], dst_y_opt[i]); \ } \ for (int i = 0; i < kDstHalfWidth * kDstHalfHeight * DST_BPC * 2; ++i) { \ - ASSERT_EQ(dst_uv_c[i], dst_uv_opt[i]); \ + EXPECT_EQ(dst_uv_c[i], dst_uv_opt[i]); \ } \ free_aligned_buffer_page_end(dst_y_c); \ free_aligned_buffer_page_end(dst_uv_c); \ @@ -480,15 +478,14 @@ TESTPLANARTOBP(I212, uint16_t, 2, 2, 1, P212, uint16_t, 2, 2, 1, 12) (kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1); \ const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X); \ const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y); \ - align_buffer_page_end(src_y, \ - kPaddedWidth * kPaddedHeight * SRC_BPC + OFF); \ + align_buffer_page_end(src_y, kPaddedWidth* kPaddedHeight* SRC_BPC + OFF); \ align_buffer_page_end( \ src_uv, \ 2 * kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * SRC_BPC + OFF); \ - align_buffer_page_end(dst_y_c, kWidth * kHeight * DST_BPC); \ + align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \ align_buffer_page_end(dst_uv_c, \ 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \ - align_buffer_page_end(dst_y_opt, kWidth * kHeight * DST_BPC); \ + align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \ align_buffer_page_end(dst_uv_opt, \ 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \ SRC_T* src_y_p = reinterpret_cast(src_y + OFF); \ @@ -505,13 +502,13 @@ TESTPLANARTOBP(I212, uint16_t, 2, 2, 1, P212, uint16_t, 2, 2, 1, 12) src_uv_p[i] = \ (fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \ } \ - memset(dst_y_c, 1, kWidth * kHeight * DST_BPC); \ + memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \ memset(dst_uv_c, 2, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \ - memset(dst_y_opt, 101, kWidth * kHeight * DST_BPC); \ + memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \ memset(dst_uv_opt, 102, 2 * kDstHalfWidth * kDstHalfHeight * DST_BPC); \ MaskCpuFlags(disable_cpu_flags_); \ SRC_FMT_PLANAR##To##FMT_PLANAR( \ - src_y_p, kWidth * SRC_BPC / (int)sizeof(SRC_T), src_uv_p, \ + src_y_p, kWidth* SRC_BPC / (int)sizeof(SRC_T), src_uv_p, \ 2 * kSrcHalfWidth * SRC_BPC / (int)sizeof(SRC_T), \ DOY ? reinterpret_cast(dst_y_c) : NULL, kWidth, \ reinterpret_cast(dst_uv_c), 2 * kDstHalfWidth, kWidth, \ @@ -519,7 +516,7 @@ TESTPLANARTOBP(I212, uint16_t, 2, 2, 1, P212, uint16_t, 2, 2, 1, 12) MaskCpuFlags(benchmark_cpu_info_); \ for (int i = 0; i < benchmark_iterations_; ++i) { \ SRC_FMT_PLANAR##To##FMT_PLANAR( \ - src_y_p, kWidth * SRC_BPC / (int)sizeof(SRC_T), src_uv_p, \ + src_y_p, kWidth* SRC_BPC / (int)sizeof(SRC_T), src_uv_p, \ 2 * kSrcHalfWidth * SRC_BPC / (int)sizeof(SRC_T), \ DOY ? reinterpret_cast(dst_y_opt) : NULL, kWidth, \ reinterpret_cast(dst_uv_opt), 2 * kDstHalfWidth, kWidth, \ @@ -528,13 +525,13 @@ TESTPLANARTOBP(I212, uint16_t, 2, 2, 1, P212, uint16_t, 2, 2, 1, 12) if (DOY) { \ for (int i = 0; i < kHeight; ++i) { \ for (int j = 0; j < kWidth; ++j) { \ - ASSERT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \ + EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \ } \ } \ } \ for (int i = 0; i < kDstHalfHeight; ++i) { \ for (int j = 0; j < 2 * kDstHalfWidth; ++j) { \ - ASSERT_EQ(dst_uv_c[i * 2 * kDstHalfWidth + j], \ + EXPECT_EQ(dst_uv_c[i * 2 * kDstHalfWidth + j], \ dst_uv_opt[i * 2 * kDstHalfWidth + j]); \ } \ } \ @@ -601,16 +598,16 @@ TESTBPTOBP(P010, uint16_t, 2, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 1, 1) const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ const int kStride = (kStrideUV * SUBSAMP_X * 8 * BPP_A + 7) / 8; \ - align_buffer_page_end(src_argb, kStride * kHeight + OFF); \ - align_buffer_page_end(dst_y_c, kWidth * kHeight); \ + align_buffer_page_end(src_argb, kStride* kHeight + OFF); \ + align_buffer_page_end(dst_y_c, kWidth* kHeight); \ align_buffer_page_end(dst_uv_c, \ kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - align_buffer_page_end(dst_y_opt, kWidth * kHeight); \ + align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ align_buffer_page_end(dst_uv_opt, \ kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - memset(dst_y_c, 1, kWidth * kHeight); \ + memset(dst_y_c, 1, kWidth* kHeight); \ memset(dst_uv_c, 2, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - memset(dst_y_opt, 101, kWidth * kHeight); \ + memset(dst_y_opt, 101, kWidth* kHeight); \ memset(dst_uv_opt, 102, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ for (int i = 0; i < kHeight; ++i) \ for (int j = 0; j < kStride; ++j) \ @@ -627,12 +624,12 @@ TESTBPTOBP(P010, uint16_t, 2, 2, 2, NV12, uint8_t, 1, 2, 2, 8, 1, 1) } \ for (int i = 0; i < kHeight; ++i) { \ for (int j = 0; j < kWidth; ++j) { \ - ASSERT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \ + EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \ } \ } \ for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * 2; ++i) { \ for (int j = 0; j < kStrideUV; ++j) { \ - ASSERT_EQ(dst_uv_c[i * kStrideUV + j], dst_uv_opt[i * kStrideUV + j]); \ + EXPECT_EQ(dst_uv_c[i * kStrideUV + j], dst_uv_opt[i * kStrideUV + j]); \ } \ } \ free_aligned_buffer_page_end(dst_y_c); \ @@ -694,20 +691,20 @@ TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1) const int kHeight = ALIGNINT(benchmark_height_, YALIGN); \ const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ const int kStride = (kStrideUV * SUBSAMP_X * 8 * BPP_A + 7) / 8; \ - align_buffer_page_end(src_argb, kStride * kHeight + OFF); \ - align_buffer_page_end(dst_a_c, kWidth * kHeight); \ - align_buffer_page_end(dst_y_c, kWidth * kHeight); \ + align_buffer_page_end(src_argb, kStride* kHeight + OFF); \ + align_buffer_page_end(dst_a_c, kWidth* kHeight); \ + align_buffer_page_end(dst_y_c, kWidth* kHeight); \ align_buffer_page_end(dst_uv_c, \ kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - align_buffer_page_end(dst_a_opt, kWidth * kHeight); \ - align_buffer_page_end(dst_y_opt, kWidth * kHeight); \ + align_buffer_page_end(dst_a_opt, kWidth* kHeight); \ + align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ align_buffer_page_end(dst_uv_opt, \ kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - memset(dst_a_c, 1, kWidth * kHeight); \ - memset(dst_y_c, 2, kWidth * kHeight); \ + memset(dst_a_c, 1, kWidth* kHeight); \ + memset(dst_y_c, 2, kWidth* kHeight); \ memset(dst_uv_c, 3, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - memset(dst_a_opt, 101, kWidth * kHeight); \ - memset(dst_y_opt, 102, kWidth * kHeight); \ + memset(dst_a_opt, 101, kWidth* kHeight); \ + memset(dst_y_opt, 102, kWidth* kHeight); \ memset(dst_uv_opt, 103, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ for (int i = 0; i < kHeight; ++i) \ for (int j = 0; j < kStride; ++j) \ @@ -725,13 +722,13 @@ TESTATOPLANAR(YUY2, 2, 1, I422, 2, 1) } \ for (int i = 0; i < kHeight; ++i) { \ for (int j = 0; j < kWidth; ++j) { \ - ASSERT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \ - ASSERT_EQ(dst_a_c[i * kWidth + j], dst_a_opt[i * kWidth + j]); \ + EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \ + EXPECT_EQ(dst_a_c[i * kWidth + j], dst_a_opt[i * kWidth + j]); \ } \ } \ for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y) * 2; ++i) { \ for (int j = 0; j < kStrideUV; ++j) { \ - ASSERT_EQ(dst_uv_c[i * kStrideUV + j], dst_uv_opt[i * kStrideUV + j]); \ + EXPECT_EQ(dst_uv_c[i * kStrideUV + j], dst_uv_opt[i * kStrideUV + j]); \ } \ } \ free_aligned_buffer_page_end(dst_a_c); \ @@ -768,19 +765,19 @@ TESTATOPLANARA(ARGB, 4, 1, I420Alpha, 2, 2) const int kHeight = benchmark_height_; \ const int kStride = SUBSAMPLE(kWidth, SUB_A) * BPP_A; \ const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \ - align_buffer_page_end(src_argb, kStride * kHeight + OFF); \ - align_buffer_page_end(dst_y_c, kWidth * kHeight); \ + align_buffer_page_end(src_argb, kStride* kHeight + OFF); \ + align_buffer_page_end(dst_y_c, kWidth* kHeight); \ align_buffer_page_end(dst_uv_c, \ kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - align_buffer_page_end(dst_y_opt, kWidth * kHeight); \ + align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ align_buffer_page_end(dst_uv_opt, \ kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ for (int i = 0; i < kHeight; ++i) \ for (int j = 0; j < kStride; ++j) \ src_argb[(i * kStride) + j + OFF] = (fastrand() & 0xff); \ - memset(dst_y_c, 1, kWidth * kHeight); \ + memset(dst_y_c, 1, kWidth* kHeight); \ memset(dst_uv_c, 2, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - memset(dst_y_opt, 101, kWidth * kHeight); \ + memset(dst_y_opt, 101, kWidth* kHeight); \ memset(dst_uv_opt, 102, kStrideUV * 2 * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ MaskCpuFlags(disable_cpu_flags_); \ FMT_A##To##FMT_PLANAR(src_argb + OFF, kStride, dst_y_c, kWidth, dst_uv_c, \ @@ -792,12 +789,12 @@ TESTATOPLANARA(ARGB, 4, 1, I420Alpha, 2, 2) } \ for (int i = 0; i < kHeight; ++i) { \ for (int j = 0; j < kWidth; ++j) { \ - ASSERT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \ + EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \ } \ } \ for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ for (int j = 0; j < kStrideUV * 2; ++j) { \ - ASSERT_EQ(dst_uv_c[i * kStrideUV * 2 + j], \ + EXPECT_EQ(dst_uv_c[i * kStrideUV * 2 + j], \ dst_uv_opt[i * kStrideUV * 2 + j]); \ } \ } \ @@ -847,11 +844,11 @@ TEST_F(LibYUVConvertTest, ValidateJpeg) { // No SOI or EOI. Expect fail. memset(orig_pixels, 0, kSize); - ASSERT_FALSE(ValidateJpeg(orig_pixels, kSize)); + EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); // Test special value that matches marker start. memset(orig_pixels, 0xff, kSize); - ASSERT_FALSE(ValidateJpeg(orig_pixels, kSize)); + EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); // EOI, SOI. Expect pass. orig_pixels[0] = 0xff; @@ -860,7 +857,7 @@ TEST_F(LibYUVConvertTest, ValidateJpeg) { orig_pixels[kSize - kOff + 0] = 0xff; orig_pixels[kSize - kOff + 1] = 0xd9; // EOI. for (int times = 0; times < benchmark_iterations_; ++times) { - ASSERT_TRUE(ValidateJpeg(orig_pixels, kSize)); + EXPECT_TRUE(ValidateJpeg(orig_pixels, kSize)); } free_aligned_buffer_page_end(orig_pixels); } @@ -878,7 +875,7 @@ TEST_F(LibYUVConvertTest, ValidateJpegLarge) { // No SOI or EOI. Expect fail. memset(orig_pixels, 0, kBufSize); - ASSERT_FALSE(ValidateJpeg(orig_pixels, kBufSize)); + EXPECT_FALSE(ValidateJpeg(orig_pixels, kBufSize)); // EOI, SOI. Expect pass. orig_pixels[0] = 0xff; @@ -887,7 +884,7 @@ TEST_F(LibYUVConvertTest, ValidateJpegLarge) { orig_pixels[kSize - kOff + 0] = 0xff; orig_pixels[kSize - kOff + 1] = 0xd9; // EOI. for (int times = 0; times < benchmark_iterations_; ++times) { - ASSERT_TRUE(ValidateJpeg(orig_pixels, kBufSize)); + EXPECT_TRUE(ValidateJpeg(orig_pixels, kBufSize)); } free_aligned_buffer_page_end(orig_pixels); } @@ -902,24 +899,24 @@ TEST_F(LibYUVConvertTest, InvalidateJpeg) { align_buffer_page_end(orig_pixels, kSize); // NULL pointer. Expect fail. - ASSERT_FALSE(ValidateJpeg(NULL, kSize)); + EXPECT_FALSE(ValidateJpeg(NULL, kSize)); // Negative size. Expect fail. - ASSERT_FALSE(ValidateJpeg(orig_pixels, -1)); + EXPECT_FALSE(ValidateJpeg(orig_pixels, -1)); // Too large size. Expect fail. - ASSERT_FALSE(ValidateJpeg(orig_pixels, 0xfb000000ull)); + EXPECT_FALSE(ValidateJpeg(orig_pixels, 0xfb000000ull)); // No SOI or EOI. Expect fail. memset(orig_pixels, 0, kSize); - ASSERT_FALSE(ValidateJpeg(orig_pixels, kSize)); + EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); // SOI but no EOI. Expect fail. orig_pixels[0] = 0xff; orig_pixels[1] = 0xd8; // SOI. orig_pixels[2] = 0xff; for (int times = 0; times < benchmark_iterations_; ++times) { - ASSERT_FALSE(ValidateJpeg(orig_pixels, kSize)); + EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); } // EOI but no SOI. Expect fail. @@ -927,7 +924,7 @@ TEST_F(LibYUVConvertTest, InvalidateJpeg) { orig_pixels[1] = 0; orig_pixels[kSize - kOff + 0] = 0xff; orig_pixels[kSize - kOff + 1] = 0xd9; // EOI. - ASSERT_FALSE(ValidateJpeg(orig_pixels, kSize)); + EXPECT_FALSE(ValidateJpeg(orig_pixels, kSize)); free_aligned_buffer_page_end(orig_pixels); } @@ -1251,7 +1248,7 @@ TEST_F(LibYUVConvertTest, TestMJPGSize) { int width = 0; int height = 0; int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); - ASSERT_EQ(0, ret); + EXPECT_EQ(0, ret); printf("test jpeg size %d x %d\n", width, height); } @@ -1260,7 +1257,7 @@ TEST_F(LibYUVConvertTest, TestMJPGToI420) { int width = 0; int height = 0; int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); - ASSERT_EQ(0, ret); + EXPECT_EQ(0, ret); int half_width = (width + 1) / 2; int half_height = (height + 1) / 2; @@ -1278,15 +1275,15 @@ TEST_F(LibYUVConvertTest, TestMJPGToI420) { dst_v, half_width, width, height, width, height); } // Expect sucesss - ASSERT_EQ(0, ret); + EXPECT_EQ(0, ret); // Test result matches known hash value. uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); uint32_t dst_u_hash = HashDjb2(dst_u, half_width * half_height, 5381); uint32_t dst_v_hash = HashDjb2(dst_v, half_width * half_height, 5381); - ASSERT_EQ(dst_y_hash, 2682851208u); - ASSERT_EQ(dst_u_hash, 2501859930u); - ASSERT_EQ(dst_v_hash, 2126459123u); + EXPECT_EQ(dst_y_hash, 2682851208u); + EXPECT_EQ(dst_u_hash, 2501859930u); + EXPECT_EQ(dst_v_hash, 2126459123u); free_aligned_buffer_page_end(dst_y); free_aligned_buffer_page_end(dst_u); @@ -1297,7 +1294,7 @@ TEST_F(LibYUVConvertTest, TestMJPGToI420_NV21) { int width = 0; int height = 0; int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); - ASSERT_EQ(0, ret); + EXPECT_EQ(0, ret); int half_width = (width + 1) / 2; int half_height = (height + 1) / 2; @@ -1316,7 +1313,7 @@ TEST_F(LibYUVConvertTest, TestMJPGToI420_NV21) { half_width * 2, width, height, width, height); } // Expect sucesss - ASSERT_EQ(0, ret); + EXPECT_EQ(0, ret); // Convert to I420 align_buffer_page_end(dst2_y, width * height); @@ -1327,7 +1324,7 @@ TEST_F(LibYUVConvertTest, TestMJPGToI420_NV21) { dst2_v, half_width, width, height, width, height); } // Expect sucesss - ASSERT_EQ(0, ret); + EXPECT_EQ(0, ret); // Convert I420 to NV21 align_buffer_page_end(dst3_y, width * height); @@ -1337,11 +1334,11 @@ TEST_F(LibYUVConvertTest, TestMJPGToI420_NV21) { width, dst3_vu, half_width * 2, width, height); for (int i = 0; i < width * height; ++i) { - ASSERT_EQ(dst_y[i], dst3_y[i]); + EXPECT_EQ(dst_y[i], dst3_y[i]); } for (int i = 0; i < half_width * half_height * 2; ++i) { - ASSERT_EQ(dst_vu[i], dst3_vu[i]); - ASSERT_EQ(dst_vu[i], dst3_vu[i]); + EXPECT_EQ(dst_vu[i], dst3_vu[i]); + EXPECT_EQ(dst_vu[i], dst3_vu[i]); } free_aligned_buffer_page_end(dst3_y); @@ -1359,7 +1356,7 @@ TEST_F(LibYUVConvertTest, TestMJPGToI420_NV12) { int width = 0; int height = 0; int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); - ASSERT_EQ(0, ret); + EXPECT_EQ(0, ret); int half_width = (width + 1) / 2; int half_height = (height + 1) / 2; @@ -1378,7 +1375,7 @@ TEST_F(LibYUVConvertTest, TestMJPGToI420_NV12) { half_width * 2, width, height, width, height); } // Expect sucesss - ASSERT_EQ(0, ret); + EXPECT_EQ(0, ret); // Convert to I420 align_buffer_page_end(dst2_y, width * height); @@ -1389,7 +1386,7 @@ TEST_F(LibYUVConvertTest, TestMJPGToI420_NV12) { dst2_v, half_width, width, height, width, height); } // Expect sucesss - ASSERT_EQ(0, ret); + EXPECT_EQ(0, ret); // Convert I420 to NV12 align_buffer_page_end(dst3_y, width * height); @@ -1399,11 +1396,11 @@ TEST_F(LibYUVConvertTest, TestMJPGToI420_NV12) { width, dst3_uv, half_width * 2, width, height); for (int i = 0; i < width * height; ++i) { - ASSERT_EQ(dst_y[i], dst3_y[i]); + EXPECT_EQ(dst_y[i], dst3_y[i]); } for (int i = 0; i < half_width * half_height * 2; ++i) { - ASSERT_EQ(dst_uv[i], dst3_uv[i]); - ASSERT_EQ(dst_uv[i], dst3_uv[i]); + EXPECT_EQ(dst_uv[i], dst3_uv[i]); + EXPECT_EQ(dst_uv[i], dst3_uv[i]); } free_aligned_buffer_page_end(dst3_y); @@ -1421,7 +1418,7 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV21_420) { int width = 0; int height = 0; int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); - ASSERT_EQ(0, ret); + EXPECT_EQ(0, ret); int half_width = (width + 1) / 2; int half_height = (height + 1) / 2; @@ -1438,13 +1435,13 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV21_420) { half_width * 2, width, height, width, height); } // Expect sucesss - ASSERT_EQ(0, ret); + EXPECT_EQ(0, ret); // Test result matches known hash value. uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381); - ASSERT_EQ(dst_y_hash, 2682851208u); - ASSERT_EQ(dst_uv_hash, 1069662856u); + EXPECT_EQ(dst_y_hash, 2682851208u); + EXPECT_EQ(dst_uv_hash, 1069662856u); free_aligned_buffer_page_end(dst_y); free_aligned_buffer_page_end(dst_uv); @@ -1454,7 +1451,7 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV12_420) { int width = 0; int height = 0; int ret = MJPGSize(kTest2Jpg, kTest2JpgLen, &width, &height); - ASSERT_EQ(0, ret); + EXPECT_EQ(0, ret); int half_width = (width + 1) / 2; int half_height = (height + 1) / 2; @@ -1471,7 +1468,7 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV12_420) { half_width * 2, width, height, width, height); } // Expect sucesss - ASSERT_EQ(0, ret); + EXPECT_EQ(0, ret); // Test result matches known hash value. Hashes are for VU so flip the plane. uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); @@ -1479,8 +1476,8 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV12_420) { SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width, half_height); uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381); - ASSERT_EQ(dst_y_hash, 2682851208u); - ASSERT_EQ(dst_vu_hash, 1069662856u); + EXPECT_EQ(dst_y_hash, 2682851208u); + EXPECT_EQ(dst_vu_hash, 1069662856u); free_aligned_buffer_page_end(dst_y); free_aligned_buffer_page_end(dst_uv); @@ -1492,7 +1489,7 @@ TEST_F(LibYUVConvertTest, DISABLED_TestMJPGToNV21_422) { int width = 0; int height = 0; int ret = MJPGSize(kTest3Jpg, kTest3JpgLen, &width, &height); - ASSERT_EQ(0, ret); + EXPECT_EQ(0, ret); int half_width = (width + 1) / 2; int half_height = (height + 1) / 2; @@ -1509,13 +1506,13 @@ TEST_F(LibYUVConvertTest, DISABLED_TestMJPGToNV21_422) { half_width * 2, width, height, width, height); } // Expect sucesss - ASSERT_EQ(0, ret); + EXPECT_EQ(0, ret); // Test result matches known hash value. uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381); - ASSERT_EQ(dst_y_hash, 2682851208u); - ASSERT_EQ(dst_uv_hash, 493520167u); + EXPECT_EQ(dst_y_hash, 2682851208u); + EXPECT_EQ(dst_uv_hash, 493520167u); free_aligned_buffer_page_end(dst_y); free_aligned_buffer_page_end(dst_uv); @@ -1525,7 +1522,7 @@ TEST_F(LibYUVConvertTest, DISABLED_TestMJPGToNV12_422) { int width = 0; int height = 0; int ret = MJPGSize(kTest3Jpg, kTest3JpgLen, &width, &height); - ASSERT_EQ(0, ret); + EXPECT_EQ(0, ret); int half_width = (width + 1) / 2; int half_height = (height + 1) / 2; @@ -1542,7 +1539,7 @@ TEST_F(LibYUVConvertTest, DISABLED_TestMJPGToNV12_422) { half_width * 2, width, height, width, height); } // Expect sucesss - ASSERT_EQ(0, ret); + EXPECT_EQ(0, ret); // Test result matches known hash value. Hashes are for VU so flip the plane. uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); @@ -1550,8 +1547,8 @@ TEST_F(LibYUVConvertTest, DISABLED_TestMJPGToNV12_422) { SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width, half_height); uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381); - ASSERT_EQ(dst_y_hash, 2682851208u); - ASSERT_EQ(dst_vu_hash, 493520167u); + EXPECT_EQ(dst_y_hash, 2682851208u); + EXPECT_EQ(dst_vu_hash, 493520167u); free_aligned_buffer_page_end(dst_y); free_aligned_buffer_page_end(dst_uv); @@ -1562,7 +1559,7 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV21_400) { int width = 0; int height = 0; int ret = MJPGSize(kTest0Jpg, kTest0JpgLen, &width, &height); - ASSERT_EQ(0, ret); + EXPECT_EQ(0, ret); int half_width = (width + 1) / 2; int half_height = (height + 1) / 2; @@ -1579,13 +1576,13 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV21_400) { half_width * 2, width, height, width, height); } // Expect sucesss - ASSERT_EQ(0, ret); + EXPECT_EQ(0, ret); // Test result matches known hash value. uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381); - ASSERT_EQ(dst_y_hash, 330644005u); - ASSERT_EQ(dst_uv_hash, 135214341u); + EXPECT_EQ(dst_y_hash, 330644005u); + EXPECT_EQ(dst_uv_hash, 135214341u); free_aligned_buffer_page_end(dst_y); free_aligned_buffer_page_end(dst_uv); @@ -1595,7 +1592,7 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV12_400) { int width = 0; int height = 0; int ret = MJPGSize(kTest0Jpg, kTest0JpgLen, &width, &height); - ASSERT_EQ(0, ret); + EXPECT_EQ(0, ret); int half_width = (width + 1) / 2; int half_height = (height + 1) / 2; @@ -1612,7 +1609,7 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV12_400) { half_width * 2, width, height, width, height); } // Expect sucesss - ASSERT_EQ(0, ret); + EXPECT_EQ(0, ret); // Test result matches known hash value. Hashes are for VU so flip the plane. uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); @@ -1620,8 +1617,8 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV12_400) { SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width, half_height); uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381); - ASSERT_EQ(dst_y_hash, 330644005u); - ASSERT_EQ(dst_vu_hash, 135214341u); + EXPECT_EQ(dst_y_hash, 330644005u); + EXPECT_EQ(dst_vu_hash, 135214341u); free_aligned_buffer_page_end(dst_y); free_aligned_buffer_page_end(dst_uv); @@ -1632,7 +1629,7 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV21_444) { int width = 0; int height = 0; int ret = MJPGSize(kTest1Jpg, kTest1JpgLen, &width, &height); - ASSERT_EQ(0, ret); + EXPECT_EQ(0, ret); int half_width = (width + 1) / 2; int half_height = (height + 1) / 2; @@ -1649,13 +1646,13 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV21_444) { half_width * 2, width, height, width, height); } // Expect sucesss - ASSERT_EQ(0, ret); + EXPECT_EQ(0, ret); // Test result matches known hash value. uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); uint32_t dst_uv_hash = HashDjb2(dst_uv, half_width * half_height * 2, 5381); - ASSERT_EQ(dst_y_hash, 2682851208u); - ASSERT_EQ(dst_uv_hash, 506143297u); + EXPECT_EQ(dst_y_hash, 2682851208u); + EXPECT_EQ(dst_uv_hash, 506143297u); free_aligned_buffer_page_end(dst_y); free_aligned_buffer_page_end(dst_uv); @@ -1665,7 +1662,7 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV12_444) { int width = 0; int height = 0; int ret = MJPGSize(kTest1Jpg, kTest1JpgLen, &width, &height); - ASSERT_EQ(0, ret); + EXPECT_EQ(0, ret); int half_width = (width + 1) / 2; int half_height = (height + 1) / 2; @@ -1682,7 +1679,7 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV12_444) { half_width * 2, width, height, width, height); } // Expect sucesss - ASSERT_EQ(0, ret); + EXPECT_EQ(0, ret); // Test result matches known hash value. Hashes are for VU so flip the plane. uint32_t dst_y_hash = HashDjb2(dst_y, width * height, 5381); @@ -1690,8 +1687,8 @@ TEST_F(LibYUVConvertTest, TestMJPGToNV12_444) { SwapUVPlane(dst_uv, half_width * 2, dst_vu, half_width * 2, half_width, half_height); uint32_t dst_vu_hash = HashDjb2(dst_vu, half_width * half_height * 2, 5381); - ASSERT_EQ(dst_y_hash, 2682851208u); - ASSERT_EQ(dst_vu_hash, 506143297u); + EXPECT_EQ(dst_y_hash, 2682851208u); + EXPECT_EQ(dst_vu_hash, 506143297u); free_aligned_buffer_page_end(dst_y); free_aligned_buffer_page_end(dst_uv); @@ -1702,7 +1699,7 @@ TEST_F(LibYUVConvertTest, TestMJPGToARGB) { int width = 0; int height = 0; int ret = MJPGSize(kTest3Jpg, kTest3JpgLen, &width, &height); - ASSERT_EQ(0, ret); + EXPECT_EQ(0, ret); int benchmark_iterations = benchmark_iterations_ * benchmark_width_ * benchmark_height_ / (width * height); @@ -1716,14 +1713,14 @@ TEST_F(LibYUVConvertTest, TestMJPGToARGB) { height, width, height); } // Expect sucesss - ASSERT_EQ(0, ret); + EXPECT_EQ(0, ret); // Test result matches known hash value. uint32_t dst_argb_hash = HashDjb2(dst_argb, width * height, 5381); #ifdef LIBYUV_UNLIMITED_DATA - ASSERT_EQ(dst_argb_hash, 3900633302u); + EXPECT_EQ(dst_argb_hash, 3900633302u); #else - ASSERT_EQ(dst_argb_hash, 2355976473u); + EXPECT_EQ(dst_argb_hash, 2355976473u); #endif free_aligned_buffer_page_end(dst_argb); @@ -1786,11 +1783,11 @@ static int ShowJPegInfo(const uint8_t* sample, size_t sample_size) { } TEST_F(LibYUVConvertTest, TestMJPGInfo) { - ASSERT_EQ(1, ShowJPegInfo(kTest0Jpg, kTest0JpgLen)); - ASSERT_EQ(1, ShowJPegInfo(kTest1Jpg, kTest1JpgLen)); - ASSERT_EQ(1, ShowJPegInfo(kTest2Jpg, kTest2JpgLen)); - ASSERT_EQ(1, ShowJPegInfo(kTest3Jpg, kTest3JpgLen)); - ASSERT_EQ(1, ShowJPegInfo(kTest4Jpg, + EXPECT_EQ(1, ShowJPegInfo(kTest0Jpg, kTest0JpgLen)); + EXPECT_EQ(1, ShowJPegInfo(kTest1Jpg, kTest1JpgLen)); + EXPECT_EQ(1, ShowJPegInfo(kTest2Jpg, kTest2JpgLen)); + EXPECT_EQ(1, ShowJPegInfo(kTest3Jpg, kTest3JpgLen)); + EXPECT_EQ(1, ShowJPegInfo(kTest4Jpg, kTest4JpgLen)); // Valid but unsupported. } #endif // HAVE_JPEG @@ -1851,18 +1848,18 @@ TEST_F(LibYUVConvertTest, NV12Crop) { for (int i = 0; i < kDestHeight; ++i) { for (int j = 0; j < kDestWidth; ++j) { - ASSERT_EQ(dst_y[i * kWidth + j], dst_y_2[i * kWidth + j]); + EXPECT_EQ(dst_y[i * kWidth + j], dst_y_2[i * kWidth + j]); } } for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) { for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) { - ASSERT_EQ(dst_u[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j], + EXPECT_EQ(dst_u[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j], dst_u_2[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]); } } for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) { for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) { - ASSERT_EQ(dst_v[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j], + EXPECT_EQ(dst_v[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j], dst_v_2[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]); } } @@ -1924,19 +1921,19 @@ TEST_F(LibYUVConvertTest, I420CropOddY) { for (int i = 0; i < kDestHeight; ++i) { for (int j = 0; j < kDestWidth; ++j) { - ASSERT_EQ(src_y[crop_y * kWidth + i * kWidth + j], + EXPECT_EQ(src_y[crop_y * kWidth + i * kWidth + j], dst_y[i * kDestWidth + j]); } } for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) { for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) { - ASSERT_EQ(src_u[(crop_y / 2 + i) * kStrideU + j], + EXPECT_EQ(src_u[(crop_y / 2 + i) * kStrideU + j], dst_u[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]); } } for (int i = 0; i < SUBSAMPLE(kDestHeight, SUBSAMP_Y); ++i) { for (int j = 0; j < SUBSAMPLE(kDestWidth, SUBSAMP_X); ++j) { - ASSERT_EQ(src_v[(crop_y / 2 + i) * kStrideV + j], + EXPECT_EQ(src_v[(crop_y / 2 + i) * kStrideV + j], dst_v[i * SUBSAMPLE(kDestWidth, SUBSAMP_X) + j]); } } @@ -1953,17 +1950,17 @@ TEST_F(LibYUVConvertTest, I420CropOddY) { const int kHeight = benchmark_height_; \ \ align_buffer_page_end(orig_uyvy, 4 * SUBSAMPLE(kWidth, 2) * kHeight); \ - align_buffer_page_end(orig_y, kWidth * kHeight); \ + align_buffer_page_end(orig_y, kWidth* kHeight); \ align_buffer_page_end(orig_u, \ SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); \ align_buffer_page_end(orig_v, \ SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); \ \ - align_buffer_page_end(dst_y_orig, kWidth * kHeight); \ + align_buffer_page_end(dst_y_orig, kWidth* kHeight); \ align_buffer_page_end(dst_uv_orig, \ 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); \ \ - align_buffer_page_end(dst_y, kWidth * kHeight); \ + align_buffer_page_end(dst_y, kWidth* kHeight); \ align_buffer_page_end(dst_uv, \ 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2)); \ \ @@ -1984,14 +1981,14 @@ TEST_F(LibYUVConvertTest, I420CropOddY) { } \ \ for (int i = 0; i < kWidth * kHeight; ++i) { \ - ASSERT_EQ(orig_y[i], dst_y[i]); \ + EXPECT_EQ(orig_y[i], dst_y[i]); \ } \ for (int i = 0; i < kWidth * kHeight; ++i) { \ - ASSERT_EQ(dst_y_orig[i], dst_y[i]); \ + EXPECT_EQ(dst_y_orig[i], dst_y[i]); \ } \ for (int i = 0; i < 2 * SUBSAMPLE(kWidth, 2) * SUBSAMPLE(kHeight, 2); \ ++i) { \ - ASSERT_EQ(dst_uv_orig[i], dst_uv[i]); \ + EXPECT_EQ(dst_uv_orig[i], dst_uv[i]); \ } \ \ free_aligned_buffer_page_end(orig_uyvy); \ @@ -2040,7 +2037,7 @@ TEST_F(LibYUVConvertTest, MM21ToYUY2) { } for (int i = 0; i < 4 * SUBSAMPLE(kWidth, 2) * kHeight; ++i) { - ASSERT_EQ(dst_yuyv[i], golden_yuyv[i]); + EXPECT_EQ(dst_yuyv[i], golden_yuyv[i]); } free_aligned_buffer_page_end(orig_y); @@ -2053,6 +2050,7 @@ TEST_F(LibYUVConvertTest, MM21ToYUY2) { } // Test RGB24 to J420 is exact +#if defined(LIBYUV_BIT_EXACT) TEST_F(LibYUVConvertTest, TestRGB24ToJ420) { const int kSize = 256; align_buffer_page_end(orig_rgb24, kSize * 3 * 2); // 2 rows of RGB24 @@ -2072,13 +2070,15 @@ TEST_F(LibYUVConvertTest, TestRGB24ToJ420) { } uint32_t checksum = HashDjb2(dest_j420, kSize * 3 / 2 * 2, 5381); - ASSERT_EQ(223551344u, checksum); + EXPECT_EQ(223551344u, checksum); free_aligned_buffer_page_end(orig_rgb24); free_aligned_buffer_page_end(dest_j420); } +#endif // Test RGB24 to I420 is exact +#if defined(LIBYUV_BIT_EXACT) TEST_F(LibYUVConvertTest, TestRGB24ToI420) { const int kSize = 256; align_buffer_page_end(orig_rgb24, kSize * 3 * 2); // 2 rows of RGB24 @@ -2098,11 +2098,12 @@ TEST_F(LibYUVConvertTest, TestRGB24ToI420) { } uint32_t checksum = HashDjb2(dest_i420, kSize * 3 / 2 * 2, 5381); - ASSERT_EQ(4197774805u, checksum); + EXPECT_EQ(4197774805u, checksum); free_aligned_buffer_page_end(orig_rgb24); free_aligned_buffer_page_end(dest_i420); } +#endif TEST_F(LibYUVConvertTest, TestJ420ToI420) { const uint8_t src_y[12] = {0, 0, 128, 128, 255, 255, @@ -2115,15 +2116,15 @@ TEST_F(LibYUVConvertTest, TestJ420ToI420) { ASSERT_EQ(J420ToI420(src_y, 6, src_u, 3, src_v, 3, dst_y, 6, dst_u, 3, dst_v, 3, 6, 2), 0); - ASSERT_EQ(dst_y[0], 16); - ASSERT_EQ(dst_y[2], 126); - ASSERT_EQ(dst_y[4], 235); - ASSERT_EQ(dst_u[0], 16); - ASSERT_EQ(dst_u[1], 128); - ASSERT_EQ(dst_u[2], 240); - ASSERT_EQ(dst_v[0], 16); - ASSERT_EQ(dst_v[1], 128); - ASSERT_EQ(dst_v[2], 240); + EXPECT_EQ(dst_y[0], 16); + EXPECT_EQ(dst_y[2], 126); + EXPECT_EQ(dst_y[4], 235); + EXPECT_EQ(dst_u[0], 16); + EXPECT_EQ(dst_u[1], 128); + EXPECT_EQ(dst_u[2], 240); + EXPECT_EQ(dst_v[0], 16); + EXPECT_EQ(dst_v[1], 128); + EXPECT_EQ(dst_v[2], 240); } TEST_F(LibYUVConvertTest, TestABGRToI420Matrix) { @@ -2176,6 +2177,42 @@ TEST_F(LibYUVConvertTest, TestABGRToI420Matrix) { free_aligned_buffer_page_end(ref_v); } +TEST_F(LibYUVConvertTest, TestABGRToI422Matrix) { + const int kWidth = 16; + const int kHeight = 16; + align_buffer_page_end(src_abgr, kWidth * kHeight * 4); + align_buffer_page_end(dst_y, kWidth * kHeight); + align_buffer_page_end(dst_u, kWidth / 2 * kHeight); + align_buffer_page_end(dst_v, kWidth / 2 * kHeight); + + MemRandomize(src_abgr, kWidth * kHeight * 4); + + // JPEG + ARGBToI422Matrix(src_abgr, kWidth * 4, dst_y, kWidth, dst_u, kWidth / 2, + dst_v, kWidth / 2, &kAbgrJPEGConstants, kWidth, kHeight); + // Verify against non-matrix version + align_buffer_page_end(ref_y, kWidth * kHeight); + align_buffer_page_end(ref_u, kWidth / 2 * kHeight); + align_buffer_page_end(ref_v, kWidth / 2 * kHeight); + ABGRToJ422(src_abgr, kWidth * 4, ref_y, kWidth, ref_u, kWidth / 2, ref_v, + kWidth / 2, kWidth, kHeight); + for (int i = 0; i < kWidth * kHeight; ++i) { + ASSERT_EQ(dst_y[i], ref_y[i]); + } + for (int i = 0; i < kWidth / 2 * kHeight; ++i) { + ASSERT_EQ(dst_u[i], ref_u[i]); + ASSERT_EQ(dst_v[i], ref_v[i]); + } + + free_aligned_buffer_page_end(src_abgr); + free_aligned_buffer_page_end(dst_y); + free_aligned_buffer_page_end(dst_u); + free_aligned_buffer_page_end(dst_v); + free_aligned_buffer_page_end(ref_y); + free_aligned_buffer_page_end(ref_u); + free_aligned_buffer_page_end(ref_v); +} + TEST_F(LibYUVConvertTest, TestARGBToNV12Matrix) { const int kWidth = 16; const int kHeight = 16; @@ -2290,18 +2327,17 @@ TEST_F(LibYUVConvertTest, TestARGBToI420Matrix) { dst_v, kWidth / 2, &kArgbU2020Constants, kWidth, kHeight); // Reference BT.709 (limited range) - // Y = round(0.2126 * 219 / 255 * R + 0.7152 * 219 / 255 * G + 0.0722 * 219 / - // 255 * B + 16) Y = round(0.1826 * R + 0.6142 * G + 0.0620 * B + 16) 47 * 255 - // + 157 * 255 + 16 * 255 + 4224 = 11985 + 40035 + 4080 + 4224 = 60324 60324 / - // 256 = 235.64 -> 235. Correct. + // Y = round(0.2126 * 219 / 255 * R + 0.7152 * 219 / 255 * G + 0.0722 * 219 / 255 * B + 16) + // Y = round(0.1826 * R + 0.6142 * G + 0.0620 * B + 16) + // 47 * 255 + 157 * 255 + 16 * 255 + 4224 = 11985 + 40035 + 4080 + 4224 = 60324 + // 60324 / 256 = 235.64 -> 235. Correct. - for (int i = 0; i < kWidth * kHeight * 4; ++i) - src_argb[i] = 255; + for (int i = 0; i < kWidth * kHeight * 4; ++i) src_argb[i] = 255; ARGBToI420Matrix(src_argb, kWidth * 4, dst_y, kWidth, dst_u, kWidth / 2, dst_v, kWidth / 2, &kArgbH709Constants, kWidth, kHeight); - ASSERT_EQ(dst_y[0], 235); - ASSERT_EQ(dst_u[0], 128); - ASSERT_EQ(dst_v[0], 128); + EXPECT_EQ(dst_y[0], 235); + EXPECT_EQ(dst_u[0], 128); + EXPECT_EQ(dst_v[0], 128); for (int i = 0; i < kWidth * kHeight * 4; i += 4) { src_argb[i + 0] = 0; // B @@ -2312,11 +2348,11 @@ TEST_F(LibYUVConvertTest, TestARGBToI420Matrix) { ARGBToI420Matrix(src_argb, kWidth * 4, dst_y, kWidth, dst_u, kWidth / 2, dst_v, kWidth / 2, &kArgbH709Constants, kWidth, kHeight); // Y = 47 * 255 + 4224 = 11985 + 4224 = 16209. 16209 / 256 = 63.3 -> 63. - ASSERT_EQ(dst_y[0], 63); + EXPECT_EQ(dst_y[0], 63); // U = -26 * 255 + 32768 = -6630 + 32768 = 26138. 26138 / 256 = 102.1 -> 102. - ASSERT_EQ(dst_u[0], 102); + EXPECT_EQ(dst_u[0], 102); // V = 112 * 255 + 32768 = 28560 + 32768 = 61328. 61328 / 256 = 239.5 -> 239. - ASSERT_EQ(dst_v[0], 239); + EXPECT_EQ(dst_v[0], 239); free_aligned_buffer_page_end(src_argb); free_aligned_buffer_page_end(dst_y); @@ -2427,132 +2463,6 @@ TEST_F(LibYUVConvertTest, TestARGBToI444Matrix) { free_aligned_buffer_page_end(ref_v); } -template -static void TestRGBToI420(ConvertToYUV convert_to_yuv, - ConvertToARGB convert_to_argb, - int width, - int height, - int disable_cpu_flags, - int benchmark_cpu_info) { - align_buffer_page_end(src_rgb, width * height * 4); - align_buffer_page_end(dst_y, width * height); - align_buffer_page_end(dst_u, (width + 1) / 2 * (height + 1) / 2); - align_buffer_page_end(dst_v, (width + 1) / 2 * (height + 1) / 2); - - align_buffer_page_end(tmp_argb, width * height * 4); - align_buffer_page_end(ref_y, width * height); - align_buffer_page_end(ref_u, (width + 1) / 2 * (height + 1) / 2); - align_buffer_page_end(ref_v, (width + 1) / 2 * (height + 1) / 2); - - MemRandomize(src_rgb, width * height * 4); - - { - SCOPED_TRACE("C_Version"); - MaskCpuFlags(disable_cpu_flags); - - // Clear buffers - memset(dst_y, 0, width * height); - memset(dst_u, 0, (width + 1) / 2 * (height + 1) / 2); - memset(dst_v, 0, (width + 1) / 2 * (height + 1) / 2); - memset(ref_y, 0, width * height); - memset(ref_u, 0, (width + 1) / 2 * (height + 1) / 2); - memset(ref_v, 0, (width + 1) / 2 * (height + 1) / 2); - memset(tmp_argb, 0, width * height * 4); - - int r1 = - convert_to_yuv(src_rgb, width * 4, dst_y, width, dst_u, (width + 1) / 2, - dst_v, (width + 1) / 2, width, height); - ASSERT_EQ(r1, 0); - - int r2 = - convert_to_argb(src_rgb, width * 4, tmp_argb, width * 4, width, height); - ASSERT_EQ(r2, 0); - - int r3 = ARGBToI420(tmp_argb, width * 4, ref_y, width, ref_u, - (width + 1) / 2, ref_v, (width + 1) / 2, width, height); - ASSERT_EQ(r3, 0); - - for (int i = 0; i < width * height; ++i) { - ASSERT_EQ(dst_y[i], ref_y[i]); - } - for (int i = 0; i < (width + 1) / 2 * (height + 1) / 2; ++i) { - ASSERT_EQ(dst_u[i], ref_u[i]); - ASSERT_EQ(dst_v[i], ref_v[i]); - } - } - - { - SCOPED_TRACE("SIMD_Version"); - MaskCpuFlags(benchmark_cpu_info); - - // Clear buffers - memset(dst_y, 0, width * height); - memset(dst_u, 0, (width + 1) / 2 * (height + 1) / 2); - memset(dst_v, 0, (width + 1) / 2 * (height + 1) / 2); - memset(ref_y, 0, width * height); - memset(ref_u, 0, (width + 1) / 2 * (height + 1) / 2); - memset(ref_v, 0, (width + 1) / 2 * (height + 1) / 2); - memset(tmp_argb, 0, width * height * 4); - - int r1 = - convert_to_yuv(src_rgb, width * 4, dst_y, width, dst_u, (width + 1) / 2, - dst_v, (width + 1) / 2, width, height); - ASSERT_EQ(r1, 0); - - int r2 = - convert_to_argb(src_rgb, width * 4, tmp_argb, width * 4, width, height); - ASSERT_EQ(r2, 0); - - int r3 = ARGBToI420(tmp_argb, width * 4, ref_y, width, ref_u, - (width + 1) / 2, ref_v, (width + 1) / 2, width, height); - ASSERT_EQ(r3, 0); - - for (int i = 0; i < width * height; ++i) { - ASSERT_EQ(dst_y[i], ref_y[i]); - } - for (int i = 0; i < (width + 1) / 2 * (height + 1) / 2; ++i) { - ASSERT_EQ(dst_u[i], ref_u[i]); - ASSERT_EQ(dst_v[i], ref_v[i]); - } - } - - free_aligned_buffer_page_end(src_rgb); - free_aligned_buffer_page_end(dst_y); - free_aligned_buffer_page_end(dst_u); - free_aligned_buffer_page_end(dst_v); - free_aligned_buffer_page_end(tmp_argb); - free_aligned_buffer_page_end(ref_y); - free_aligned_buffer_page_end(ref_u); - free_aligned_buffer_page_end(ref_v); -} - -TEST_F(LibYUVConvertTest, BGRAToI420_Check) { - TestRGBToI420(BGRAToI420, BGRAToARGB, 16, 16, disable_cpu_flags_, - benchmark_cpu_info_); - TestRGBToI420(BGRAToI420, BGRAToARGB, 17, 17, disable_cpu_flags_, - benchmark_cpu_info_); - TestRGBToI420(BGRAToI420, BGRAToARGB, 1280, 720, disable_cpu_flags_, - benchmark_cpu_info_); -} - -TEST_F(LibYUVConvertTest, RGBAToI420_Check) { - TestRGBToI420(RGBAToI420, RGBAToARGB, 16, 16, disable_cpu_flags_, - benchmark_cpu_info_); - TestRGBToI420(RGBAToI420, RGBAToARGB, 17, 17, disable_cpu_flags_, - benchmark_cpu_info_); - TestRGBToI420(RGBAToI420, RGBAToARGB, 1280, 720, disable_cpu_flags_, - benchmark_cpu_info_); -} - -TEST_F(LibYUVConvertTest, ABGRToI420_Check) { - TestRGBToI420(ABGRToI420, ABGRToARGB, 16, 16, disable_cpu_flags_, - benchmark_cpu_info_); - TestRGBToI420(ABGRToI420, ABGRToARGB, 17, 17, disable_cpu_flags_, - benchmark_cpu_info_); - TestRGBToI420(ABGRToI420, ABGRToARGB, 1280, 720, disable_cpu_flags_, - benchmark_cpu_info_); -} - #endif // !defined(LEAN_TESTS) } // namespace libyuv diff --git a/unit_test/cpu_test.cc b/unit_test/cpu_test.cc index b24f3e250..a70666740 100644 --- a/unit_test/cpu_test.cc +++ b/unit_test/cpu_test.cc @@ -48,7 +48,7 @@ TEST_F(LibYUVBaseTest, TestCpuId) { printf("Cpu Vendor: %s 0x%x 0x%x 0x%x\n", reinterpret_cast(&cpu_info[0]), cpu_info[0], cpu_info[1], cpu_info[2]); - ASSERT_EQ(12u, strlen(reinterpret_cast(&cpu_info[0]))); + EXPECT_EQ(12u, strlen(reinterpret_cast(&cpu_info[0]))); // CPU Family and Model // 3:0 - Stepping @@ -189,6 +189,7 @@ TEST_F(LibYUVBaseTest, TestCpuHas) { int has_avxvnni = TestCpuFlag(kCpuHasAVXVNNI); int has_avxvnniint8 = TestCpuFlag(kCpuHasAVXVNNIINT8); int has_amxint8 = TestCpuFlag(kCpuHasAMXINT8); + int has_avx512bmm = TestCpuFlag(kCpuHasAVX512BMM); printf("Has X86 0x%x\n", has_x86); printf("Has SSE2 0x%x\n", has_sse2); printf("Has SSSE3 0x%x\n", has_ssse3); @@ -211,6 +212,7 @@ TEST_F(LibYUVBaseTest, TestCpuHas) { printf("HAS AVXVNNI 0x%x\n", has_avxvnni); printf("Has AVXVNNIINT8 0x%x\n", has_avxvnniint8); printf("Has AMXINT8 0x%x\n", has_amxint8); + printf("Has AVX512BMM 0x%x\n", has_avx512bmm); } #endif // defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || // defined(_M_X64) @@ -327,8 +329,8 @@ TEST_F(LibYUVBaseTest, DISABLED_TestLinuxArm) { if (FileExists("../../unit_test/testdata/arm_v7.txt")) { printf("Note: testing to load \"../../unit_test/testdata/arm_v7.txt\"\n"); - ASSERT_EQ(0, ArmCpuCaps("../../unit_test/testdata/arm_v7.txt")); - ASSERT_EQ(kCpuHasNEON, ArmCpuCaps("../../unit_test/testdata/tegra3.txt")); + EXPECT_EQ(0, ArmCpuCaps("../../unit_test/testdata/arm_v7.txt")); + EXPECT_EQ(kCpuHasNEON, ArmCpuCaps("../../unit_test/testdata/tegra3.txt")); } else { printf("WARNING: unable to load \"../../unit_test/testdata/arm_v7.txt\"\n"); } @@ -347,23 +349,23 @@ TEST_F(LibYUVBaseTest, DISABLED_TestLinuxArm) { #if defined(__linux__) && defined(__aarch64__) TEST_F(LibYUVBaseTest, TestLinuxAArch64) { // Values taken from a Cortex-A57 machine, only Neon available. - ASSERT_EQ(kCpuHasNEON, AArch64CpuCaps(0xffU, 0x0U)); + EXPECT_EQ(kCpuHasNEON, AArch64CpuCaps(0xffU, 0x0U)); // Values taken from a Google Pixel 7. int expected = kCpuHasNEON | kCpuHasNeonDotProd; - ASSERT_EQ(expected, AArch64CpuCaps(0x119fffU, 0x0U)); + EXPECT_EQ(expected, AArch64CpuCaps(0x119fffU, 0x0U)); // Values taken from a Google Pixel 8. expected = kCpuHasNEON | kCpuHasNeonDotProd | kCpuHasNeonI8MM | kCpuHasSVE | kCpuHasSVE2; - ASSERT_EQ(expected, AArch64CpuCaps(0x3fffffffU, 0x2f33fU)); + EXPECT_EQ(expected, AArch64CpuCaps(0x3fffffffU, 0x2f33fU)); // Values taken from a Neoverse N2 machine. - ASSERT_EQ(expected, AArch64CpuCaps(0x3fffffffU, 0x2f3ffU)); + EXPECT_EQ(expected, AArch64CpuCaps(0x3fffffffU, 0x2f3ffU)); // Check for SME feature detection. expected |= kCpuHasSME; - ASSERT_EQ(expected, AArch64CpuCaps(0x3fffffffU, 0x82f3ffU)); + EXPECT_EQ(expected, AArch64CpuCaps(0x3fffffffU, 0x82f3ffU)); // TODO: Check for SME2 feature detection from Apple M4 } @@ -373,10 +375,10 @@ TEST_F(LibYUVBaseTest, DISABLED_TestLinuxRVV) { if (FileExists("../../unit_test/testdata/riscv64.txt")) { printf("Note: testing to load \"../../unit_test/testdata/riscv64.txt\"\n"); - ASSERT_EQ(0, RiscvCpuCaps("../../unit_test/testdata/riscv64.txt")); - ASSERT_EQ(kCpuHasRVV, + EXPECT_EQ(0, RiscvCpuCaps("../../unit_test/testdata/riscv64.txt")); + EXPECT_EQ(kCpuHasRVV, RiscvCpuCaps("../../unit_test/testdata/riscv64_rvv.txt")); - ASSERT_EQ(kCpuHasRVV | kCpuHasRVVZVFH, + EXPECT_EQ(kCpuHasRVV | kCpuHasRVVZVFH, RiscvCpuCaps("../../unit_test/testdata/riscv64_rvv_zvfh.txt")); } else { printf( @@ -410,15 +412,15 @@ TEST_F(LibYUVBaseTest, MAYBE_TestSetCpuFlags) { // Test setting different CPU configurations. int cpu_flags = kCpuHasARM | kCpuHasNEON | kCpuInitialized; SetCpuFlags(cpu_flags); - ASSERT_EQ(cpu_flags, TestCpuFlag(-1)); + EXPECT_EQ(cpu_flags, TestCpuFlag(-1)); cpu_flags = kCpuHasX86 | kCpuInitialized; SetCpuFlags(cpu_flags); - ASSERT_EQ(cpu_flags, TestCpuFlag(-1)); + EXPECT_EQ(cpu_flags, TestCpuFlag(-1)); // Test that setting 0 turns auto-init back on. SetCpuFlags(0); - ASSERT_EQ(original_cpu_flags, TestCpuFlag(-1)); + EXPECT_EQ(original_cpu_flags, TestCpuFlag(-1)); // Restore the CPU flag mask. MaskCpuFlags(benchmark_cpu_info_); diff --git a/unit_test/cpu_thread_test.cc b/unit_test/cpu_thread_test.cc index 572074d73..b6c0fa066 100644 --- a/unit_test/cpu_thread_test.cc +++ b/unit_test/cpu_thread_test.cc @@ -51,10 +51,10 @@ TEST(LibYUVCpuThreadTest, TestCpuFlagMultipleThreads) { ret = pthread_create(&thread2, nullptr, ThreadMain, &cpu_flags2); ASSERT_EQ(ret, 0); ret = pthread_join(thread1, nullptr); - ASSERT_EQ(ret, 0); + EXPECT_EQ(ret, 0); ret = pthread_join(thread2, nullptr); - ASSERT_EQ(ret, 0); - ASSERT_EQ(cpu_flags1, cpu_flags2); + EXPECT_EQ(ret, 0); + EXPECT_EQ(cpu_flags1, cpu_flags2); #else printf("pthread unavailable; Test skipped."); #endif // LIBYUV_HAVE_PTHREAD diff --git a/unit_test/math_test.cc b/unit_test/math_test.cc index 4767f8b46..a1544c122 100644 --- a/unit_test/math_test.cc +++ b/unit_test/math_test.cc @@ -30,44 +30,44 @@ TEST_F(LibYUVBaseTest, TestFixedDiv) { int result_opt[1280]; int result_c[1280]; - ASSERT_EQ(0x10000, libyuv::FixedDiv(1, 1)); - ASSERT_EQ(0x7fff0000, libyuv::FixedDiv(0x7fff, 1)); + EXPECT_EQ(0x10000, libyuv::FixedDiv(1, 1)); + EXPECT_EQ(0x7fff0000, libyuv::FixedDiv(0x7fff, 1)); // TODO(fbarchard): Avoid the following that throw exceptions. - // ASSERT_EQ(0x100000000, libyuv::FixedDiv(0x10000, 1)); - // ASSERT_EQ(0x80000000, libyuv::FixedDiv(0x8000, 1)); + // EXPECT_EQ(0x100000000, libyuv::FixedDiv(0x10000, 1)); + // EXPECT_EQ(0x80000000, libyuv::FixedDiv(0x8000, 1)); - ASSERT_EQ(0x20000, libyuv::FixedDiv(640 * 2, 640)); - ASSERT_EQ(0x30000, libyuv::FixedDiv(640 * 3, 640)); - ASSERT_EQ(0x40000, libyuv::FixedDiv(640 * 4, 640)); - ASSERT_EQ(0x50000, libyuv::FixedDiv(640 * 5, 640)); - ASSERT_EQ(0x60000, libyuv::FixedDiv(640 * 6, 640)); - ASSERT_EQ(0x70000, libyuv::FixedDiv(640 * 7, 640)); - ASSERT_EQ(0x80000, libyuv::FixedDiv(640 * 8, 640)); - ASSERT_EQ(0xa0000, libyuv::FixedDiv(640 * 10, 640)); - ASSERT_EQ(0x20000, libyuv::FixedDiv(960 * 2, 960)); - ASSERT_EQ(0x08000, libyuv::FixedDiv(640 / 2, 640)); - ASSERT_EQ(0x04000, libyuv::FixedDiv(640 / 4, 640)); - ASSERT_EQ(0x20000, libyuv::FixedDiv(1080 * 2, 1080)); - ASSERT_EQ(0x20000, libyuv::FixedDiv(200000, 100000)); - ASSERT_EQ(0x18000, libyuv::FixedDiv(150000, 100000)); - ASSERT_EQ(0x20000, libyuv::FixedDiv(40000, 20000)); - ASSERT_EQ(0x20000, libyuv::FixedDiv(-40000, -20000)); - ASSERT_EQ(-0x20000, libyuv::FixedDiv(40000, -20000)); - ASSERT_EQ(-0x20000, libyuv::FixedDiv(-40000, 20000)); - ASSERT_EQ(0x10000, libyuv::FixedDiv(4095, 4095)); - ASSERT_EQ(0x10000, libyuv::FixedDiv(4096, 4096)); - ASSERT_EQ(0x10000, libyuv::FixedDiv(4097, 4097)); - ASSERT_EQ(123 * 65536, libyuv::FixedDiv(123, 1)); + EXPECT_EQ(0x20000, libyuv::FixedDiv(640 * 2, 640)); + EXPECT_EQ(0x30000, libyuv::FixedDiv(640 * 3, 640)); + EXPECT_EQ(0x40000, libyuv::FixedDiv(640 * 4, 640)); + EXPECT_EQ(0x50000, libyuv::FixedDiv(640 * 5, 640)); + EXPECT_EQ(0x60000, libyuv::FixedDiv(640 * 6, 640)); + EXPECT_EQ(0x70000, libyuv::FixedDiv(640 * 7, 640)); + EXPECT_EQ(0x80000, libyuv::FixedDiv(640 * 8, 640)); + EXPECT_EQ(0xa0000, libyuv::FixedDiv(640 * 10, 640)); + EXPECT_EQ(0x20000, libyuv::FixedDiv(960 * 2, 960)); + EXPECT_EQ(0x08000, libyuv::FixedDiv(640 / 2, 640)); + EXPECT_EQ(0x04000, libyuv::FixedDiv(640 / 4, 640)); + EXPECT_EQ(0x20000, libyuv::FixedDiv(1080 * 2, 1080)); + EXPECT_EQ(0x20000, libyuv::FixedDiv(200000, 100000)); + EXPECT_EQ(0x18000, libyuv::FixedDiv(150000, 100000)); + EXPECT_EQ(0x20000, libyuv::FixedDiv(40000, 20000)); + EXPECT_EQ(0x20000, libyuv::FixedDiv(-40000, -20000)); + EXPECT_EQ(-0x20000, libyuv::FixedDiv(40000, -20000)); + EXPECT_EQ(-0x20000, libyuv::FixedDiv(-40000, 20000)); + EXPECT_EQ(0x10000, libyuv::FixedDiv(4095, 4095)); + EXPECT_EQ(0x10000, libyuv::FixedDiv(4096, 4096)); + EXPECT_EQ(0x10000, libyuv::FixedDiv(4097, 4097)); + EXPECT_EQ(123 * 65536, libyuv::FixedDiv(123, 1)); for (int i = 1; i < 4100; ++i) { - ASSERT_EQ(0x10000, libyuv::FixedDiv(i, i)); - ASSERT_EQ(0x20000, libyuv::FixedDiv(i * 2, i)); - ASSERT_EQ(0x30000, libyuv::FixedDiv(i * 3, i)); - ASSERT_EQ(0x40000, libyuv::FixedDiv(i * 4, i)); - ASSERT_EQ(0x08000, libyuv::FixedDiv(i, i * 2)); - ASSERT_NEAR(16384 * 65536 / i, libyuv::FixedDiv(16384, i), 1); + EXPECT_EQ(0x10000, libyuv::FixedDiv(i, i)); + EXPECT_EQ(0x20000, libyuv::FixedDiv(i * 2, i)); + EXPECT_EQ(0x30000, libyuv::FixedDiv(i * 3, i)); + EXPECT_EQ(0x40000, libyuv::FixedDiv(i * 4, i)); + EXPECT_EQ(0x08000, libyuv::FixedDiv(i, i * 2)); + EXPECT_NEAR(16384 * 65536 / i, libyuv::FixedDiv(16384, i), 1); } - ASSERT_EQ(123 * 65536, libyuv::FixedDiv(123, 1)); + EXPECT_EQ(123 * 65536, libyuv::FixedDiv(123, 1)); MemRandomize(reinterpret_cast(&num[0]), sizeof(num)); MemRandomize(reinterpret_cast(&div[0]), sizeof(div)); @@ -84,7 +84,7 @@ TEST_F(LibYUVBaseTest, TestFixedDiv) { } for (int j = 0; j < 1280; ++j) { result_c[j] = libyuv::FixedDiv_C(num[j], div[j]); - ASSERT_NEAR(result_c[j], result_opt[j], 1); + EXPECT_NEAR(result_c[j], result_opt[j], 1); } } @@ -118,7 +118,7 @@ TEST_F(LibYUVBaseTest, TestFixedDiv_Opt) { } for (int j = 0; j < 1280; ++j) { result_c[j] = libyuv::FixedDiv_C(num[j], div[j]); - ASSERT_NEAR(result_c[j], result_opt[j], 1); + EXPECT_NEAR(result_c[j], result_opt[j], 1); } } @@ -152,7 +152,7 @@ TEST_F(LibYUVBaseTest, TestFixedDiv1_Opt) { } for (int j = 0; j < 1280; ++j) { result_c[j] = libyuv::FixedDiv1_C(num[j], div[j]); - ASSERT_NEAR(result_c[j], result_opt[j], 1); + EXPECT_NEAR(result_c[j], result_opt[j], 1); } } #endif // ENABLE_ROW_TESTS diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc index 7eba494b7..2e26b4cf6 100644 --- a/unit_test/planar_test.cc +++ b/unit_test/planar_test.cc @@ -29,7 +29,11 @@ #include "libyuv/row.h" /* For ScaleSumSamples_Neon */ #endif +#if defined(LIBYUV_BIT_EXACT) #define EXPECTED_UNATTENUATE_DIFF 0 +#else +#define EXPECTED_UNATTENUATE_DIFF 2 +#endif namespace libyuv { @@ -64,48 +68,48 @@ TEST_F(LibYUVPlanarTest, TestAttenuate) { orig_pixels[4 * 4 + 3] = 255u; ARGBUnattenuate(orig_pixels, 0, unatten_pixels, 0, 5, 1); - ASSERT_EQ(255u, unatten_pixels[0 * 4 + 0]); - ASSERT_EQ(255u, unatten_pixels[0 * 4 + 1]); - ASSERT_EQ(254u, unatten_pixels[0 * 4 + 2]); - ASSERT_EQ(128u, unatten_pixels[0 * 4 + 3]); - ASSERT_EQ(0u, unatten_pixels[1 * 4 + 0]); - ASSERT_EQ(0u, unatten_pixels[1 * 4 + 1]); - ASSERT_EQ(0u, unatten_pixels[1 * 4 + 2]); - ASSERT_EQ(0u, unatten_pixels[1 * 4 + 3]); - ASSERT_EQ(32u, unatten_pixels[2 * 4 + 0]); - ASSERT_EQ(128u, unatten_pixels[2 * 4 + 1]); - ASSERT_EQ(255u, unatten_pixels[2 * 4 + 2]); - ASSERT_EQ(128u, unatten_pixels[2 * 4 + 3]); - ASSERT_EQ(16u, unatten_pixels[3 * 4 + 0]); - ASSERT_EQ(64u, unatten_pixels[3 * 4 + 1]); - ASSERT_EQ(192u, unatten_pixels[3 * 4 + 2]); - ASSERT_EQ(255u, unatten_pixels[3 * 4 + 3]); - ASSERT_EQ(255u, unatten_pixels[4 * 4 + 0]); - ASSERT_EQ(255u, unatten_pixels[4 * 4 + 1]); - ASSERT_EQ(255u, unatten_pixels[4 * 4 + 2]); - ASSERT_EQ(255u, unatten_pixels[4 * 4 + 3]); + EXPECT_EQ(255u, unatten_pixels[0 * 4 + 0]); + EXPECT_EQ(255u, unatten_pixels[0 * 4 + 1]); + EXPECT_EQ(254u, unatten_pixels[0 * 4 + 2]); + EXPECT_EQ(128u, unatten_pixels[0 * 4 + 3]); + EXPECT_EQ(0u, unatten_pixels[1 * 4 + 0]); + EXPECT_EQ(0u, unatten_pixels[1 * 4 + 1]); + EXPECT_EQ(0u, unatten_pixels[1 * 4 + 2]); + EXPECT_EQ(0u, unatten_pixels[1 * 4 + 3]); + EXPECT_EQ(32u, unatten_pixels[2 * 4 + 0]); + EXPECT_EQ(128u, unatten_pixels[2 * 4 + 1]); + EXPECT_EQ(255u, unatten_pixels[2 * 4 + 2]); + EXPECT_EQ(128u, unatten_pixels[2 * 4 + 3]); + EXPECT_EQ(16u, unatten_pixels[3 * 4 + 0]); + EXPECT_EQ(64u, unatten_pixels[3 * 4 + 1]); + EXPECT_EQ(192u, unatten_pixels[3 * 4 + 2]); + EXPECT_EQ(255u, unatten_pixels[3 * 4 + 3]); + EXPECT_EQ(255u, unatten_pixels[4 * 4 + 0]); + EXPECT_EQ(255u, unatten_pixels[4 * 4 + 1]); + EXPECT_EQ(255u, unatten_pixels[4 * 4 + 2]); + EXPECT_EQ(255u, unatten_pixels[4 * 4 + 3]); ARGBAttenuate(orig_pixels, 0, atten_pixels, 0, 5, 1); - ASSERT_EQ(100u, atten_pixels[0 * 4 + 0]); - ASSERT_EQ(65u, atten_pixels[0 * 4 + 1]); - ASSERT_EQ(64u, atten_pixels[0 * 4 + 2]); - ASSERT_EQ(128u, atten_pixels[0 * 4 + 3]); - ASSERT_EQ(0u, atten_pixels[1 * 4 + 0]); - ASSERT_EQ(0u, atten_pixels[1 * 4 + 1]); - ASSERT_EQ(0u, atten_pixels[1 * 4 + 2]); - ASSERT_EQ(0u, atten_pixels[1 * 4 + 3]); - ASSERT_EQ(8u, atten_pixels[2 * 4 + 0]); - ASSERT_EQ(32u, atten_pixels[2 * 4 + 1]); - ASSERT_EQ(96u, atten_pixels[2 * 4 + 2]); - ASSERT_EQ(128u, atten_pixels[2 * 4 + 3]); - ASSERT_EQ(16u, atten_pixels[3 * 4 + 0]); - ASSERT_EQ(64u, atten_pixels[3 * 4 + 1]); - ASSERT_EQ(192u, atten_pixels[3 * 4 + 2]); - ASSERT_EQ(255u, atten_pixels[3 * 4 + 3]); - ASSERT_EQ(255u, atten_pixels[4 * 4 + 0]); - ASSERT_EQ(255u, atten_pixels[4 * 4 + 1]); - ASSERT_EQ(255u, atten_pixels[4 * 4 + 2]); - ASSERT_EQ(255u, atten_pixels[4 * 4 + 3]); + EXPECT_EQ(100u, atten_pixels[0 * 4 + 0]); + EXPECT_EQ(65u, atten_pixels[0 * 4 + 1]); + EXPECT_EQ(64u, atten_pixels[0 * 4 + 2]); + EXPECT_EQ(128u, atten_pixels[0 * 4 + 3]); + EXPECT_EQ(0u, atten_pixels[1 * 4 + 0]); + EXPECT_EQ(0u, atten_pixels[1 * 4 + 1]); + EXPECT_EQ(0u, atten_pixels[1 * 4 + 2]); + EXPECT_EQ(0u, atten_pixels[1 * 4 + 3]); + EXPECT_EQ(8u, atten_pixels[2 * 4 + 0]); + EXPECT_EQ(32u, atten_pixels[2 * 4 + 1]); + EXPECT_EQ(96u, atten_pixels[2 * 4 + 2]); + EXPECT_EQ(128u, atten_pixels[2 * 4 + 3]); + EXPECT_EQ(16u, atten_pixels[3 * 4 + 0]); + EXPECT_EQ(64u, atten_pixels[3 * 4 + 1]); + EXPECT_EQ(192u, atten_pixels[3 * 4 + 2]); + EXPECT_EQ(255u, atten_pixels[3 * 4 + 3]); + EXPECT_EQ(255u, atten_pixels[4 * 4 + 0]); + EXPECT_EQ(255u, atten_pixels[4 * 4 + 1]); + EXPECT_EQ(255u, atten_pixels[4 * 4 + 2]); + EXPECT_EQ(255u, atten_pixels[4 * 4 + 3]); // test 255 for (int i = 0; i < 256; ++i) { @@ -116,10 +120,10 @@ TEST_F(LibYUVPlanarTest, TestAttenuate) { } ARGBAttenuate(orig_pixels, 0, atten_pixels, 0, 256, 1); for (int i = 0; i < 256; ++i) { - ASSERT_EQ(orig_pixels[i * 4 + 0], atten_pixels[i * 4 + 0]); - ASSERT_EQ(0, atten_pixels[i * 4 + 1]); - ASSERT_EQ(0, atten_pixels[i * 4 + 2]); - ASSERT_EQ(255, atten_pixels[i * 4 + 3]); + EXPECT_EQ(orig_pixels[i * 4 + 0], atten_pixels[i * 4 + 0]); + EXPECT_EQ(0, atten_pixels[i * 4 + 1]); + EXPECT_EQ(0, atten_pixels[i * 4 + 2]); + EXPECT_EQ(255, atten_pixels[i * 4 + 3]); } for (int i = 0; i < 1280; ++i) { @@ -134,24 +138,24 @@ TEST_F(LibYUVPlanarTest, TestAttenuate) { ARGBAttenuate(unatten_pixels, 0, atten2_pixels, 0, 1280, 1); } for (int i = 0; i < 1280; ++i) { - ASSERT_NEAR(atten_pixels[i * 4 + 0], atten2_pixels[i * 4 + 0], 1); - ASSERT_NEAR(atten_pixels[i * 4 + 1], atten2_pixels[i * 4 + 1], 1); - ASSERT_NEAR(atten_pixels[i * 4 + 2], atten2_pixels[i * 4 + 2], 1); - ASSERT_NEAR(atten_pixels[i * 4 + 3], atten2_pixels[i * 4 + 3], 1); + EXPECT_NEAR(atten_pixels[i * 4 + 0], atten2_pixels[i * 4 + 0], 1); + EXPECT_NEAR(atten_pixels[i * 4 + 1], atten2_pixels[i * 4 + 1], 1); + EXPECT_NEAR(atten_pixels[i * 4 + 2], atten2_pixels[i * 4 + 2], 1); + EXPECT_NEAR(atten_pixels[i * 4 + 3], atten2_pixels[i * 4 + 3], 1); } // Make sure transparent, 50% and opaque are fully accurate. - ASSERT_EQ(0, atten_pixels[0 * 4 + 0]); - ASSERT_EQ(0, atten_pixels[0 * 4 + 1]); - ASSERT_EQ(0, atten_pixels[0 * 4 + 2]); - ASSERT_EQ(0, atten_pixels[0 * 4 + 3]); - ASSERT_EQ(64, atten_pixels[128 * 4 + 0]); - ASSERT_EQ(32, atten_pixels[128 * 4 + 1]); - ASSERT_EQ(21, atten_pixels[128 * 4 + 2]); - ASSERT_EQ(128, atten_pixels[128 * 4 + 3]); - ASSERT_EQ(255, atten_pixels[255 * 4 + 0]); - ASSERT_EQ(127, atten_pixels[255 * 4 + 1]); - ASSERT_EQ(85, atten_pixels[255 * 4 + 2]); - ASSERT_EQ(255, atten_pixels[255 * 4 + 3]); + EXPECT_EQ(0, atten_pixels[0 * 4 + 0]); + EXPECT_EQ(0, atten_pixels[0 * 4 + 1]); + EXPECT_EQ(0, atten_pixels[0 * 4 + 2]); + EXPECT_EQ(0, atten_pixels[0 * 4 + 3]); + EXPECT_EQ(64, atten_pixels[128 * 4 + 0]); + EXPECT_EQ(32, atten_pixels[128 * 4 + 1]); + EXPECT_EQ(21, atten_pixels[128 * 4 + 2]); + EXPECT_EQ(128, atten_pixels[128 * 4 + 3]); + EXPECT_EQ(255, atten_pixels[255 * 4 + 0]); + EXPECT_EQ(127, atten_pixels[255 * 4 + 1]); + EXPECT_EQ(85, atten_pixels[255 * 4 + 2]); + EXPECT_EQ(255, atten_pixels[255 * 4 + 3]); free_aligned_buffer_page_end(atten2_pixels); free_aligned_buffer_page_end(unatten_pixels); @@ -207,28 +211,28 @@ TEST_F(LibYUVPlanarTest, ARGBAttenuate_Any) { benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0); - ASSERT_EQ(max_diff, 0); + EXPECT_EQ(max_diff, 0); } TEST_F(LibYUVPlanarTest, ARGBAttenuate_Unaligned) { int max_diff = TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 1); - ASSERT_EQ(max_diff, 0); + EXPECT_EQ(max_diff, 0); } TEST_F(LibYUVPlanarTest, ARGBAttenuate_Invert) { int max_diff = TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, -1, 0); - ASSERT_EQ(max_diff, 0); + EXPECT_EQ(max_diff, 0); } TEST_F(LibYUVPlanarTest, ARGBAttenuate_Opt) { int max_diff = TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0); - ASSERT_EQ(max_diff, 0); + EXPECT_EQ(max_diff, 0); } static int TestUnattenuateI(int width, @@ -280,28 +284,28 @@ TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Any) { int max_diff = TestUnattenuateI(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0); - ASSERT_EQ(max_diff, 0); + EXPECT_LE(max_diff, EXPECTED_UNATTENUATE_DIFF); } TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Unaligned) { int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 1); - ASSERT_EQ(max_diff, 0); + EXPECT_LE(max_diff, EXPECTED_UNATTENUATE_DIFF); } TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Invert) { int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, -1, 0); - ASSERT_EQ(max_diff, 0); + EXPECT_LE(max_diff, EXPECTED_UNATTENUATE_DIFF); } TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Opt) { int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0); - ASSERT_EQ(max_diff, 0); + EXPECT_LE(max_diff, EXPECTED_UNATTENUATE_DIFF); } TEST_F(LibYUVPlanarTest, TestARGBComputeCumulativeSum) { @@ -322,10 +326,10 @@ TEST_F(LibYUVPlanarTest, TestARGBComputeCumulativeSum) { for (int y = 0; y < 16; ++y) { for (int x = 0; x < 16; ++x) { - ASSERT_EQ((x + 1) * (y + 1), added_pixels[y][x][0]); - ASSERT_EQ((x + 1) * (y + 1) * 2, added_pixels[y][x][1]); - ASSERT_EQ((x + 1) * (y + 1) * 3, added_pixels[y][x][2]); - ASSERT_EQ((x + 1) * (y + 1) * 255, added_pixels[y][x][3]); + EXPECT_EQ((x + 1) * (y + 1), added_pixels[y][x][0]); + EXPECT_EQ((x + 1) * (y + 1) * 2, added_pixels[y][x][1]); + EXPECT_EQ((x + 1) * (y + 1) * 3, added_pixels[y][x][2]); + EXPECT_EQ((x + 1) * (y + 1) * 255, added_pixels[y][x][3]); } } } @@ -367,30 +371,30 @@ TEST_F(LibYUVPlanarTest, TestARGBGray) { orig_pixels[5][3] = 224u; // Do 16 to test asm version. ARGBGray(&orig_pixels[0][0], 0, 0, 0, 16, 1); - ASSERT_NEAR(29u, orig_pixels[0][0], 1); - ASSERT_NEAR(29u, orig_pixels[0][1], 1); - ASSERT_NEAR(29u, orig_pixels[0][2], 1); - ASSERT_EQ(128u, orig_pixels[0][3]); - ASSERT_EQ(149u, orig_pixels[1][0]); - ASSERT_EQ(149u, orig_pixels[1][1]); - ASSERT_EQ(149u, orig_pixels[1][2]); - ASSERT_EQ(0u, orig_pixels[1][3]); - ASSERT_NEAR(77u, orig_pixels[2][0], 1); - ASSERT_NEAR(77u, orig_pixels[2][1], 1); - ASSERT_NEAR(77u, orig_pixels[2][2], 1); - ASSERT_EQ(255u, orig_pixels[2][3]); - ASSERT_EQ(0u, orig_pixels[3][0]); - ASSERT_EQ(0u, orig_pixels[3][1]); - ASSERT_EQ(0u, orig_pixels[3][2]); - ASSERT_EQ(255u, orig_pixels[3][3]); - ASSERT_EQ(255u, orig_pixels[4][0]); - ASSERT_EQ(255u, orig_pixels[4][1]); - ASSERT_EQ(255u, orig_pixels[4][2]); - ASSERT_EQ(255u, orig_pixels[4][3]); - ASSERT_NEAR(97u, orig_pixels[5][0], 1); - ASSERT_NEAR(97u, orig_pixels[5][1], 1); - ASSERT_NEAR(97u, orig_pixels[5][2], 1); - ASSERT_EQ(224u, orig_pixels[5][3]); + EXPECT_NEAR(29u, orig_pixels[0][0], 1); + EXPECT_NEAR(29u, orig_pixels[0][1], 1); + EXPECT_NEAR(29u, orig_pixels[0][2], 1); + EXPECT_EQ(128u, orig_pixels[0][3]); + EXPECT_EQ(149u, orig_pixels[1][0]); + EXPECT_EQ(149u, orig_pixels[1][1]); + EXPECT_EQ(149u, orig_pixels[1][2]); + EXPECT_EQ(0u, orig_pixels[1][3]); + EXPECT_NEAR(77u, orig_pixels[2][0], 1); + EXPECT_NEAR(77u, orig_pixels[2][1], 1); + EXPECT_NEAR(77u, orig_pixels[2][2], 1); + EXPECT_EQ(255u, orig_pixels[2][3]); + EXPECT_EQ(0u, orig_pixels[3][0]); + EXPECT_EQ(0u, orig_pixels[3][1]); + EXPECT_EQ(0u, orig_pixels[3][2]); + EXPECT_EQ(255u, orig_pixels[3][3]); + EXPECT_EQ(255u, orig_pixels[4][0]); + EXPECT_EQ(255u, orig_pixels[4][1]); + EXPECT_EQ(255u, orig_pixels[4][2]); + EXPECT_EQ(255u, orig_pixels[4][3]); + EXPECT_NEAR(97u, orig_pixels[5][0], 1); + EXPECT_NEAR(97u, orig_pixels[5][1], 1); + EXPECT_NEAR(97u, orig_pixels[5][2], 1); + EXPECT_EQ(224u, orig_pixels[5][3]); for (int i = 0; i < 1280; ++i) { orig_pixels[i][0] = i; orig_pixels[i][1] = i / 2; @@ -439,30 +443,30 @@ TEST_F(LibYUVPlanarTest, TestARGBGrayTo) { orig_pixels[5][3] = 224u; // Do 16 to test asm version. ARGBGrayTo(&orig_pixels[0][0], 0, &gray_pixels[0][0], 0, 16, 1); - ASSERT_NEAR(30u, gray_pixels[0][0], 1); - ASSERT_NEAR(30u, gray_pixels[0][1], 1); - ASSERT_NEAR(30u, gray_pixels[0][2], 1); - ASSERT_NEAR(128u, gray_pixels[0][3], 1); - ASSERT_NEAR(149u, gray_pixels[1][0], 1); - ASSERT_NEAR(149u, gray_pixels[1][1], 1); - ASSERT_NEAR(149u, gray_pixels[1][2], 1); - ASSERT_NEAR(0u, gray_pixels[1][3], 1); - ASSERT_NEAR(76u, gray_pixels[2][0], 1); - ASSERT_NEAR(76u, gray_pixels[2][1], 1); - ASSERT_NEAR(76u, gray_pixels[2][2], 1); - ASSERT_NEAR(255u, gray_pixels[2][3], 1); - ASSERT_NEAR(0u, gray_pixels[3][0], 1); - ASSERT_NEAR(0u, gray_pixels[3][1], 1); - ASSERT_NEAR(0u, gray_pixels[3][2], 1); - ASSERT_NEAR(255u, gray_pixels[3][3], 1); - ASSERT_NEAR(255u, gray_pixels[4][0], 1); - ASSERT_NEAR(255u, gray_pixels[4][1], 1); - ASSERT_NEAR(255u, gray_pixels[4][2], 1); - ASSERT_NEAR(255u, gray_pixels[4][3], 1); - ASSERT_NEAR(96u, gray_pixels[5][0], 1); - ASSERT_NEAR(96u, gray_pixels[5][1], 1); - ASSERT_NEAR(96u, gray_pixels[5][2], 1); - ASSERT_NEAR(224u, gray_pixels[5][3], 1); + EXPECT_NEAR(30u, gray_pixels[0][0], 1); + EXPECT_NEAR(30u, gray_pixels[0][1], 1); + EXPECT_NEAR(30u, gray_pixels[0][2], 1); + EXPECT_NEAR(128u, gray_pixels[0][3], 1); + EXPECT_NEAR(149u, gray_pixels[1][0], 1); + EXPECT_NEAR(149u, gray_pixels[1][1], 1); + EXPECT_NEAR(149u, gray_pixels[1][2], 1); + EXPECT_NEAR(0u, gray_pixels[1][3], 1); + EXPECT_NEAR(76u, gray_pixels[2][0], 1); + EXPECT_NEAR(76u, gray_pixels[2][1], 1); + EXPECT_NEAR(76u, gray_pixels[2][2], 1); + EXPECT_NEAR(255u, gray_pixels[2][3], 1); + EXPECT_NEAR(0u, gray_pixels[3][0], 1); + EXPECT_NEAR(0u, gray_pixels[3][1], 1); + EXPECT_NEAR(0u, gray_pixels[3][2], 1); + EXPECT_NEAR(255u, gray_pixels[3][3], 1); + EXPECT_NEAR(255u, gray_pixels[4][0], 1); + EXPECT_NEAR(255u, gray_pixels[4][1], 1); + EXPECT_NEAR(255u, gray_pixels[4][2], 1); + EXPECT_NEAR(255u, gray_pixels[4][3], 1); + EXPECT_NEAR(96u, gray_pixels[5][0], 1); + EXPECT_NEAR(96u, gray_pixels[5][1], 1); + EXPECT_NEAR(96u, gray_pixels[5][2], 1); + EXPECT_NEAR(224u, gray_pixels[5][3], 1); for (int i = 0; i < 1280; ++i) { orig_pixels[i][0] = i; orig_pixels[i][1] = i / 2; @@ -481,10 +485,10 @@ TEST_F(LibYUVPlanarTest, TestARGBGrayTo) { } ARGBGray(&orig_pixels[0][0], 0, 0, 0, 256, 1); for (int i = 0; i < 256; ++i) { - ASSERT_EQ(i, orig_pixels[i][0]); - ASSERT_EQ(i, orig_pixels[i][1]); - ASSERT_EQ(i, orig_pixels[i][2]); - ASSERT_EQ(i, orig_pixels[i][3]); + EXPECT_EQ(i, orig_pixels[i][0]); + EXPECT_EQ(i, orig_pixels[i][1]); + EXPECT_EQ(i, orig_pixels[i][2]); + EXPECT_EQ(i, orig_pixels[i][3]); } } @@ -524,30 +528,30 @@ TEST_F(LibYUVPlanarTest, TestARGBSepia) { orig_pixels[5][3] = 224u; // Do 16 to test asm version. ARGBSepia(&orig_pixels[0][0], 0, 0, 0, 16, 1); - ASSERT_EQ(33u, orig_pixels[0][0]); - ASSERT_EQ(43u, orig_pixels[0][1]); - ASSERT_EQ(47u, orig_pixels[0][2]); - ASSERT_EQ(128u, orig_pixels[0][3]); - ASSERT_EQ(135u, orig_pixels[1][0]); - ASSERT_EQ(175u, orig_pixels[1][1]); - ASSERT_EQ(195u, orig_pixels[1][2]); - ASSERT_EQ(0u, orig_pixels[1][3]); - ASSERT_EQ(69u, orig_pixels[2][0]); - ASSERT_EQ(89u, orig_pixels[2][1]); - ASSERT_EQ(99u, orig_pixels[2][2]); - ASSERT_EQ(255u, orig_pixels[2][3]); - ASSERT_EQ(0u, orig_pixels[3][0]); - ASSERT_EQ(0u, orig_pixels[3][1]); - ASSERT_EQ(0u, orig_pixels[3][2]); - ASSERT_EQ(255u, orig_pixels[3][3]); - ASSERT_EQ(239u, orig_pixels[4][0]); - ASSERT_EQ(255u, orig_pixels[4][1]); - ASSERT_EQ(255u, orig_pixels[4][2]); - ASSERT_EQ(255u, orig_pixels[4][3]); - ASSERT_EQ(88u, orig_pixels[5][0]); - ASSERT_EQ(114u, orig_pixels[5][1]); - ASSERT_EQ(127u, orig_pixels[5][2]); - ASSERT_EQ(224u, orig_pixels[5][3]); + EXPECT_EQ(33u, orig_pixels[0][0]); + EXPECT_EQ(43u, orig_pixels[0][1]); + EXPECT_EQ(47u, orig_pixels[0][2]); + EXPECT_EQ(128u, orig_pixels[0][3]); + EXPECT_EQ(135u, orig_pixels[1][0]); + EXPECT_EQ(175u, orig_pixels[1][1]); + EXPECT_EQ(195u, orig_pixels[1][2]); + EXPECT_EQ(0u, orig_pixels[1][3]); + EXPECT_EQ(69u, orig_pixels[2][0]); + EXPECT_EQ(89u, orig_pixels[2][1]); + EXPECT_EQ(99u, orig_pixels[2][2]); + EXPECT_EQ(255u, orig_pixels[2][3]); + EXPECT_EQ(0u, orig_pixels[3][0]); + EXPECT_EQ(0u, orig_pixels[3][1]); + EXPECT_EQ(0u, orig_pixels[3][2]); + EXPECT_EQ(255u, orig_pixels[3][3]); + EXPECT_EQ(239u, orig_pixels[4][0]); + EXPECT_EQ(255u, orig_pixels[4][1]); + EXPECT_EQ(255u, orig_pixels[4][2]); + EXPECT_EQ(255u, orig_pixels[4][3]); + EXPECT_EQ(88u, orig_pixels[5][0]); + EXPECT_EQ(114u, orig_pixels[5][1]); + EXPECT_EQ(127u, orig_pixels[5][2]); + EXPECT_EQ(224u, orig_pixels[5][3]); for (int i = 0; i < 1280; ++i) { orig_pixels[i][0] = i; @@ -595,22 +599,22 @@ TEST_F(LibYUVPlanarTest, TestARGBColorMatrix) { // Do 16 to test asm version. ARGBColorMatrix(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0, &kRGBToSepia[0], 16, 1); - ASSERT_EQ(31u, dst_pixels_opt[0][0]); - ASSERT_EQ(43u, dst_pixels_opt[0][1]); - ASSERT_EQ(47u, dst_pixels_opt[0][2]); - ASSERT_EQ(128u, dst_pixels_opt[0][3]); - ASSERT_EQ(135u, dst_pixels_opt[1][0]); - ASSERT_EQ(175u, dst_pixels_opt[1][1]); - ASSERT_EQ(195u, dst_pixels_opt[1][2]); - ASSERT_EQ(0u, dst_pixels_opt[1][3]); - ASSERT_EQ(67u, dst_pixels_opt[2][0]); - ASSERT_EQ(87u, dst_pixels_opt[2][1]); - ASSERT_EQ(99u, dst_pixels_opt[2][2]); - ASSERT_EQ(255u, dst_pixels_opt[2][3]); - ASSERT_EQ(87u, dst_pixels_opt[3][0]); - ASSERT_EQ(112u, dst_pixels_opt[3][1]); - ASSERT_EQ(127u, dst_pixels_opt[3][2]); - ASSERT_EQ(224u, dst_pixels_opt[3][3]); + EXPECT_EQ(31u, dst_pixels_opt[0][0]); + EXPECT_EQ(43u, dst_pixels_opt[0][1]); + EXPECT_EQ(47u, dst_pixels_opt[0][2]); + EXPECT_EQ(128u, dst_pixels_opt[0][3]); + EXPECT_EQ(135u, dst_pixels_opt[1][0]); + EXPECT_EQ(175u, dst_pixels_opt[1][1]); + EXPECT_EQ(195u, dst_pixels_opt[1][2]); + EXPECT_EQ(0u, dst_pixels_opt[1][3]); + EXPECT_EQ(67u, dst_pixels_opt[2][0]); + EXPECT_EQ(87u, dst_pixels_opt[2][1]); + EXPECT_EQ(99u, dst_pixels_opt[2][2]); + EXPECT_EQ(255u, dst_pixels_opt[2][3]); + EXPECT_EQ(87u, dst_pixels_opt[3][0]); + EXPECT_EQ(112u, dst_pixels_opt[3][1]); + EXPECT_EQ(127u, dst_pixels_opt[3][2]); + EXPECT_EQ(224u, dst_pixels_opt[3][3]); for (int i = 0; i < 1280; ++i) { orig_pixels[i][0] = i; @@ -629,10 +633,10 @@ TEST_F(LibYUVPlanarTest, TestARGBColorMatrix) { } for (int i = 0; i < 1280; ++i) { - ASSERT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]); - ASSERT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]); - ASSERT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]); - ASSERT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]); + EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]); + EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]); + EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]); + EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]); } } @@ -668,22 +672,22 @@ TEST_F(LibYUVPlanarTest, TestRGBColorMatrix) { orig_pixels[3][3] = 224u; // Do 16 to test asm version. RGBColorMatrix(&orig_pixels[0][0], 0, &kRGBToSepia[0], 0, 0, 16, 1); - ASSERT_EQ(31u, orig_pixels[0][0]); - ASSERT_EQ(43u, orig_pixels[0][1]); - ASSERT_EQ(47u, orig_pixels[0][2]); - ASSERT_EQ(128u, orig_pixels[0][3]); - ASSERT_EQ(135u, orig_pixels[1][0]); - ASSERT_EQ(175u, orig_pixels[1][1]); - ASSERT_EQ(195u, orig_pixels[1][2]); - ASSERT_EQ(0u, orig_pixels[1][3]); - ASSERT_EQ(67u, orig_pixels[2][0]); - ASSERT_EQ(87u, orig_pixels[2][1]); - ASSERT_EQ(99u, orig_pixels[2][2]); - ASSERT_EQ(255u, orig_pixels[2][3]); - ASSERT_EQ(87u, orig_pixels[3][0]); - ASSERT_EQ(112u, orig_pixels[3][1]); - ASSERT_EQ(127u, orig_pixels[3][2]); - ASSERT_EQ(224u, orig_pixels[3][3]); + EXPECT_EQ(31u, orig_pixels[0][0]); + EXPECT_EQ(43u, orig_pixels[0][1]); + EXPECT_EQ(47u, orig_pixels[0][2]); + EXPECT_EQ(128u, orig_pixels[0][3]); + EXPECT_EQ(135u, orig_pixels[1][0]); + EXPECT_EQ(175u, orig_pixels[1][1]); + EXPECT_EQ(195u, orig_pixels[1][2]); + EXPECT_EQ(0u, orig_pixels[1][3]); + EXPECT_EQ(67u, orig_pixels[2][0]); + EXPECT_EQ(87u, orig_pixels[2][1]); + EXPECT_EQ(99u, orig_pixels[2][2]); + EXPECT_EQ(255u, orig_pixels[2][3]); + EXPECT_EQ(87u, orig_pixels[3][0]); + EXPECT_EQ(112u, orig_pixels[3][1]); + EXPECT_EQ(127u, orig_pixels[3][2]); + EXPECT_EQ(224u, orig_pixels[3][3]); for (int i = 0; i < 1280; ++i) { orig_pixels[i][0] = i; @@ -723,22 +727,22 @@ TEST_F(LibYUVPlanarTest, TestARGBColorTable) { orig_pixels[3][3] = 3u; // Do 16 to test asm version. ARGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 16, 1); - ASSERT_EQ(1u, orig_pixels[0][0]); - ASSERT_EQ(2u, orig_pixels[0][1]); - ASSERT_EQ(3u, orig_pixels[0][2]); - ASSERT_EQ(4u, orig_pixels[0][3]); - ASSERT_EQ(5u, orig_pixels[1][0]); - ASSERT_EQ(6u, orig_pixels[1][1]); - ASSERT_EQ(7u, orig_pixels[1][2]); - ASSERT_EQ(8u, orig_pixels[1][3]); - ASSERT_EQ(9u, orig_pixels[2][0]); - ASSERT_EQ(10u, orig_pixels[2][1]); - ASSERT_EQ(11u, orig_pixels[2][2]); - ASSERT_EQ(12u, orig_pixels[2][3]); - ASSERT_EQ(1u, orig_pixels[3][0]); - ASSERT_EQ(6u, orig_pixels[3][1]); - ASSERT_EQ(11u, orig_pixels[3][2]); - ASSERT_EQ(16u, orig_pixels[3][3]); + EXPECT_EQ(1u, orig_pixels[0][0]); + EXPECT_EQ(2u, orig_pixels[0][1]); + EXPECT_EQ(3u, orig_pixels[0][2]); + EXPECT_EQ(4u, orig_pixels[0][3]); + EXPECT_EQ(5u, orig_pixels[1][0]); + EXPECT_EQ(6u, orig_pixels[1][1]); + EXPECT_EQ(7u, orig_pixels[1][2]); + EXPECT_EQ(8u, orig_pixels[1][3]); + EXPECT_EQ(9u, orig_pixels[2][0]); + EXPECT_EQ(10u, orig_pixels[2][1]); + EXPECT_EQ(11u, orig_pixels[2][2]); + EXPECT_EQ(12u, orig_pixels[2][3]); + EXPECT_EQ(1u, orig_pixels[3][0]); + EXPECT_EQ(6u, orig_pixels[3][1]); + EXPECT_EQ(11u, orig_pixels[3][2]); + EXPECT_EQ(16u, orig_pixels[3][3]); for (int i = 0; i < 1280; ++i) { orig_pixels[i][0] = i; @@ -779,22 +783,22 @@ TEST_F(LibYUVPlanarTest, TestRGBColorTable) { orig_pixels[3][3] = 3u; // Do 16 to test asm version. RGBColorTable(&orig_pixels[0][0], 0, &kARGBTable[0], 0, 0, 16, 1); - ASSERT_EQ(1u, orig_pixels[0][0]); - ASSERT_EQ(2u, orig_pixels[0][1]); - ASSERT_EQ(3u, orig_pixels[0][2]); - ASSERT_EQ(0u, orig_pixels[0][3]); // Alpha unchanged. - ASSERT_EQ(5u, orig_pixels[1][0]); - ASSERT_EQ(6u, orig_pixels[1][1]); - ASSERT_EQ(7u, orig_pixels[1][2]); - ASSERT_EQ(1u, orig_pixels[1][3]); // Alpha unchanged. - ASSERT_EQ(9u, orig_pixels[2][0]); - ASSERT_EQ(10u, orig_pixels[2][1]); - ASSERT_EQ(11u, orig_pixels[2][2]); - ASSERT_EQ(2u, orig_pixels[2][3]); // Alpha unchanged. - ASSERT_EQ(1u, orig_pixels[3][0]); - ASSERT_EQ(6u, orig_pixels[3][1]); - ASSERT_EQ(11u, orig_pixels[3][2]); - ASSERT_EQ(3u, orig_pixels[3][3]); // Alpha unchanged. + EXPECT_EQ(1u, orig_pixels[0][0]); + EXPECT_EQ(2u, orig_pixels[0][1]); + EXPECT_EQ(3u, orig_pixels[0][2]); + EXPECT_EQ(0u, orig_pixels[0][3]); // Alpha unchanged. + EXPECT_EQ(5u, orig_pixels[1][0]); + EXPECT_EQ(6u, orig_pixels[1][1]); + EXPECT_EQ(7u, orig_pixels[1][2]); + EXPECT_EQ(1u, orig_pixels[1][3]); // Alpha unchanged. + EXPECT_EQ(9u, orig_pixels[2][0]); + EXPECT_EQ(10u, orig_pixels[2][1]); + EXPECT_EQ(11u, orig_pixels[2][2]); + EXPECT_EQ(2u, orig_pixels[2][3]); // Alpha unchanged. + EXPECT_EQ(1u, orig_pixels[3][0]); + EXPECT_EQ(6u, orig_pixels[3][1]); + EXPECT_EQ(11u, orig_pixels[3][2]); + EXPECT_EQ(3u, orig_pixels[3][3]); // Alpha unchanged. for (int i = 0; i < 1280; ++i) { orig_pixels[i][0] = i; @@ -820,10 +824,10 @@ TEST_F(LibYUVPlanarTest, TestARGBQuantize) { 1280, 1); for (int i = 0; i < 1280; ++i) { - ASSERT_EQ((i / 8 * 8 + 8 / 2) & 255, orig_pixels[i][0]); - ASSERT_EQ((i / 2 / 8 * 8 + 8 / 2) & 255, orig_pixels[i][1]); - ASSERT_EQ((i / 3 / 8 * 8 + 8 / 2) & 255, orig_pixels[i][2]); - ASSERT_EQ(i & 255, orig_pixels[i][3]); + EXPECT_EQ((i / 8 * 8 + 8 / 2) & 255, orig_pixels[i][0]); + EXPECT_EQ((i / 2 / 8 * 8 + 8 / 2) & 255, orig_pixels[i][1]); + EXPECT_EQ((i / 3 / 8 * 8 + 8 / 2) & 255, orig_pixels[i][2]); + EXPECT_EQ(i & 255, orig_pixels[i][3]); } for (int i = 0; i < benchmark_pixels_div1280_; ++i) { ARGBQuantize(&orig_pixels[0][0], 0, (65536 + (8 / 2)) / 8, 8, 8 / 2, 0, 0, @@ -848,7 +852,7 @@ TEST_F(LibYUVPlanarTest, ARGBMirror_Opt) { benchmark_width_ * 4, benchmark_width_, benchmark_height_); } for (int i = 0; i < benchmark_width_ * benchmark_height_ * 4; ++i) { - ASSERT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); } free_aligned_buffer_page_end(src_pixels); free_aligned_buffer_page_end(dst_pixels_opt); @@ -871,7 +875,7 @@ TEST_F(LibYUVPlanarTest, MirrorPlane_Opt) { benchmark_width_, benchmark_height_); } for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) { - ASSERT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); } free_aligned_buffer_page_end(src_pixels); free_aligned_buffer_page_end(dst_pixels_opt); @@ -895,7 +899,7 @@ TEST_F(LibYUVPlanarTest, MirrorUVPlane_Opt) { benchmark_width_ * 2, benchmark_width_, benchmark_height_); } for (int i = 0; i < benchmark_width_ * benchmark_height_ * 2; ++i) { - ASSERT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); } free_aligned_buffer_page_end(src_pixels); free_aligned_buffer_page_end(dst_pixels_opt); @@ -925,34 +929,34 @@ TEST_F(LibYUVPlanarTest, TestShade) { orig_pixels[3][3] = 0u; // Do 8 pixels to allow opt version to be used. ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 8, 1, 0x80ffffff); - ASSERT_EQ(10u, shade_pixels[0][0]); - ASSERT_EQ(20u, shade_pixels[0][1]); - ASSERT_EQ(40u, shade_pixels[0][2]); - ASSERT_EQ(40u, shade_pixels[0][3]); - ASSERT_EQ(0u, shade_pixels[1][0]); - ASSERT_EQ(0u, shade_pixels[1][1]); - ASSERT_EQ(0u, shade_pixels[1][2]); - ASSERT_EQ(128u, shade_pixels[1][3]); - ASSERT_EQ(0u, shade_pixels[2][0]); - ASSERT_EQ(0u, shade_pixels[2][1]); - ASSERT_EQ(0u, shade_pixels[2][2]); - ASSERT_EQ(0u, shade_pixels[2][3]); - ASSERT_EQ(0u, shade_pixels[3][0]); - ASSERT_EQ(0u, shade_pixels[3][1]); - ASSERT_EQ(0u, shade_pixels[3][2]); - ASSERT_EQ(0u, shade_pixels[3][3]); + EXPECT_EQ(10u, shade_pixels[0][0]); + EXPECT_EQ(20u, shade_pixels[0][1]); + EXPECT_EQ(40u, shade_pixels[0][2]); + EXPECT_EQ(40u, shade_pixels[0][3]); + EXPECT_EQ(0u, shade_pixels[1][0]); + EXPECT_EQ(0u, shade_pixels[1][1]); + EXPECT_EQ(0u, shade_pixels[1][2]); + EXPECT_EQ(128u, shade_pixels[1][3]); + EXPECT_EQ(0u, shade_pixels[2][0]); + EXPECT_EQ(0u, shade_pixels[2][1]); + EXPECT_EQ(0u, shade_pixels[2][2]); + EXPECT_EQ(0u, shade_pixels[2][3]); + EXPECT_EQ(0u, shade_pixels[3][0]); + EXPECT_EQ(0u, shade_pixels[3][1]); + EXPECT_EQ(0u, shade_pixels[3][2]); + EXPECT_EQ(0u, shade_pixels[3][3]); ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 8, 1, 0x80808080); - ASSERT_EQ(5u, shade_pixels[0][0]); - ASSERT_EQ(10u, shade_pixels[0][1]); - ASSERT_EQ(20u, shade_pixels[0][2]); - ASSERT_EQ(40u, shade_pixels[0][3]); + EXPECT_EQ(5u, shade_pixels[0][0]); + EXPECT_EQ(10u, shade_pixels[0][1]); + EXPECT_EQ(20u, shade_pixels[0][2]); + EXPECT_EQ(40u, shade_pixels[0][3]); ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 8, 1, 0x10204080); - ASSERT_EQ(5u, shade_pixels[0][0]); - ASSERT_EQ(5u, shade_pixels[0][1]); - ASSERT_EQ(5u, shade_pixels[0][2]); - ASSERT_EQ(5u, shade_pixels[0][3]); + EXPECT_EQ(5u, shade_pixels[0][0]); + EXPECT_EQ(5u, shade_pixels[0][1]); + EXPECT_EQ(5u, shade_pixels[0][2]); + EXPECT_EQ(5u, shade_pixels[0][3]); for (int i = 0; i < benchmark_pixels_div1280_; ++i) { ARGBShade(&orig_pixels[0][0], 0, &shade_pixels[0][0], 0, 1280, 1, @@ -1003,37 +1007,37 @@ TEST_F(LibYUVPlanarTest, TestARGBInterpolate) { ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0, &interpolate_pixels[0][0], 0, 4, 1, 128); - ASSERT_EQ(8u, interpolate_pixels[0][0]); - ASSERT_EQ(16u, interpolate_pixels[0][1]); - ASSERT_EQ(32u, interpolate_pixels[0][2]); - ASSERT_EQ(64u, interpolate_pixels[0][3]); - ASSERT_EQ(0u, interpolate_pixels[1][0]); - ASSERT_EQ(0u, interpolate_pixels[1][1]); - ASSERT_EQ(0u, interpolate_pixels[1][2]); - ASSERT_EQ(128u, interpolate_pixels[1][3]); - ASSERT_EQ(0u, interpolate_pixels[2][0]); - ASSERT_EQ(0u, interpolate_pixels[2][1]); - ASSERT_EQ(0u, interpolate_pixels[2][2]); - ASSERT_EQ(0u, interpolate_pixels[2][3]); - ASSERT_EQ(128u, interpolate_pixels[3][0]); - ASSERT_EQ(128u, interpolate_pixels[3][1]); - ASSERT_EQ(128u, interpolate_pixels[3][2]); - ASSERT_EQ(128u, interpolate_pixels[3][3]); + EXPECT_EQ(8u, interpolate_pixels[0][0]); + EXPECT_EQ(16u, interpolate_pixels[0][1]); + EXPECT_EQ(32u, interpolate_pixels[0][2]); + EXPECT_EQ(64u, interpolate_pixels[0][3]); + EXPECT_EQ(0u, interpolate_pixels[1][0]); + EXPECT_EQ(0u, interpolate_pixels[1][1]); + EXPECT_EQ(0u, interpolate_pixels[1][2]); + EXPECT_EQ(128u, interpolate_pixels[1][3]); + EXPECT_EQ(0u, interpolate_pixels[2][0]); + EXPECT_EQ(0u, interpolate_pixels[2][1]); + EXPECT_EQ(0u, interpolate_pixels[2][2]); + EXPECT_EQ(0u, interpolate_pixels[2][3]); + EXPECT_EQ(128u, interpolate_pixels[3][0]); + EXPECT_EQ(128u, interpolate_pixels[3][1]); + EXPECT_EQ(128u, interpolate_pixels[3][2]); + EXPECT_EQ(128u, interpolate_pixels[3][3]); ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0, &interpolate_pixels[0][0], 0, 4, 1, 0); - ASSERT_EQ(16u, interpolate_pixels[0][0]); - ASSERT_EQ(32u, interpolate_pixels[0][1]); - ASSERT_EQ(64u, interpolate_pixels[0][2]); - ASSERT_EQ(128u, interpolate_pixels[0][3]); + EXPECT_EQ(16u, interpolate_pixels[0][0]); + EXPECT_EQ(32u, interpolate_pixels[0][1]); + EXPECT_EQ(64u, interpolate_pixels[0][2]); + EXPECT_EQ(128u, interpolate_pixels[0][3]); ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0, &interpolate_pixels[0][0], 0, 4, 1, 192); - ASSERT_EQ(4u, interpolate_pixels[0][0]); - ASSERT_EQ(8u, interpolate_pixels[0][1]); - ASSERT_EQ(16u, interpolate_pixels[0][2]); - ASSERT_EQ(32u, interpolate_pixels[0][3]); + EXPECT_EQ(4u, interpolate_pixels[0][0]); + EXPECT_EQ(8u, interpolate_pixels[0][1]); + EXPECT_EQ(16u, interpolate_pixels[0][2]); + EXPECT_EQ(32u, interpolate_pixels[0][3]); for (int i = 0; i < benchmark_pixels_div1280_; ++i) { ARGBInterpolate(&orig_pixels_0[0][0], 0, &orig_pixels_1[0][0], 0, @@ -1084,37 +1088,37 @@ TEST_F(LibYUVPlanarTest, TestInterpolatePlane) { InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0, &interpolate_pixels[0], 0, 16, 1, 128); - ASSERT_EQ(8u, interpolate_pixels[0]); - ASSERT_EQ(16u, interpolate_pixels[1]); - ASSERT_EQ(32u, interpolate_pixels[2]); - ASSERT_EQ(64u, interpolate_pixels[3]); - ASSERT_EQ(0u, interpolate_pixels[4]); - ASSERT_EQ(0u, interpolate_pixels[5]); - ASSERT_EQ(0u, interpolate_pixels[6]); - ASSERT_EQ(128u, interpolate_pixels[7]); - ASSERT_EQ(0u, interpolate_pixels[8]); - ASSERT_EQ(0u, interpolate_pixels[9]); - ASSERT_EQ(0u, interpolate_pixels[10]); - ASSERT_EQ(0u, interpolate_pixels[11]); - ASSERT_EQ(128u, interpolate_pixels[12]); - ASSERT_EQ(128u, interpolate_pixels[13]); - ASSERT_EQ(128u, interpolate_pixels[14]); - ASSERT_EQ(128u, interpolate_pixels[15]); + EXPECT_EQ(8u, interpolate_pixels[0]); + EXPECT_EQ(16u, interpolate_pixels[1]); + EXPECT_EQ(32u, interpolate_pixels[2]); + EXPECT_EQ(64u, interpolate_pixels[3]); + EXPECT_EQ(0u, interpolate_pixels[4]); + EXPECT_EQ(0u, interpolate_pixels[5]); + EXPECT_EQ(0u, interpolate_pixels[6]); + EXPECT_EQ(128u, interpolate_pixels[7]); + EXPECT_EQ(0u, interpolate_pixels[8]); + EXPECT_EQ(0u, interpolate_pixels[9]); + EXPECT_EQ(0u, interpolate_pixels[10]); + EXPECT_EQ(0u, interpolate_pixels[11]); + EXPECT_EQ(128u, interpolate_pixels[12]); + EXPECT_EQ(128u, interpolate_pixels[13]); + EXPECT_EQ(128u, interpolate_pixels[14]); + EXPECT_EQ(128u, interpolate_pixels[15]); InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0, &interpolate_pixels[0], 0, 16, 1, 0); - ASSERT_EQ(16u, interpolate_pixels[0]); - ASSERT_EQ(32u, interpolate_pixels[1]); - ASSERT_EQ(64u, interpolate_pixels[2]); - ASSERT_EQ(128u, interpolate_pixels[3]); + EXPECT_EQ(16u, interpolate_pixels[0]); + EXPECT_EQ(32u, interpolate_pixels[1]); + EXPECT_EQ(64u, interpolate_pixels[2]); + EXPECT_EQ(128u, interpolate_pixels[3]); InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0, &interpolate_pixels[0], 0, 16, 1, 192); - ASSERT_EQ(4u, interpolate_pixels[0]); - ASSERT_EQ(8u, interpolate_pixels[1]); - ASSERT_EQ(16u, interpolate_pixels[2]); - ASSERT_EQ(32u, interpolate_pixels[3]); + EXPECT_EQ(4u, interpolate_pixels[0]); + EXPECT_EQ(8u, interpolate_pixels[1]); + EXPECT_EQ(16u, interpolate_pixels[2]); + EXPECT_EQ(32u, interpolate_pixels[3]); for (int i = 0; i < benchmark_pixels_div1280_; ++i) { InterpolatePlane(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0, @@ -1165,37 +1169,37 @@ TEST_F(LibYUVPlanarTest, TestInterpolatePlane_16) { InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0, &interpolate_pixels[0], 0, 16, 1, 128); - ASSERT_EQ(8u, interpolate_pixels[0]); - ASSERT_EQ(16u, interpolate_pixels[1]); - ASSERT_EQ(32u, interpolate_pixels[2]); - ASSERT_EQ(64u, interpolate_pixels[3]); - ASSERT_EQ(0u, interpolate_pixels[4]); - ASSERT_EQ(0u, interpolate_pixels[5]); - ASSERT_EQ(0u, interpolate_pixels[6]); - ASSERT_EQ(128u, interpolate_pixels[7]); - ASSERT_EQ(0u, interpolate_pixels[8]); - ASSERT_EQ(0u, interpolate_pixels[9]); - ASSERT_EQ(0u, interpolate_pixels[10]); - ASSERT_EQ(0u, interpolate_pixels[11]); - ASSERT_EQ(128u, interpolate_pixels[12]); - ASSERT_EQ(128u, interpolate_pixels[13]); - ASSERT_EQ(128u, interpolate_pixels[14]); - ASSERT_EQ(128u, interpolate_pixels[15]); + EXPECT_EQ(8u, interpolate_pixels[0]); + EXPECT_EQ(16u, interpolate_pixels[1]); + EXPECT_EQ(32u, interpolate_pixels[2]); + EXPECT_EQ(64u, interpolate_pixels[3]); + EXPECT_EQ(0u, interpolate_pixels[4]); + EXPECT_EQ(0u, interpolate_pixels[5]); + EXPECT_EQ(0u, interpolate_pixels[6]); + EXPECT_EQ(128u, interpolate_pixels[7]); + EXPECT_EQ(0u, interpolate_pixels[8]); + EXPECT_EQ(0u, interpolate_pixels[9]); + EXPECT_EQ(0u, interpolate_pixels[10]); + EXPECT_EQ(0u, interpolate_pixels[11]); + EXPECT_EQ(128u, interpolate_pixels[12]); + EXPECT_EQ(128u, interpolate_pixels[13]); + EXPECT_EQ(128u, interpolate_pixels[14]); + EXPECT_EQ(128u, interpolate_pixels[15]); InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0, &interpolate_pixels[0], 0, 16, 1, 0); - ASSERT_EQ(16u, interpolate_pixels[0]); - ASSERT_EQ(32u, interpolate_pixels[1]); - ASSERT_EQ(64u, interpolate_pixels[2]); - ASSERT_EQ(128u, interpolate_pixels[3]); + EXPECT_EQ(16u, interpolate_pixels[0]); + EXPECT_EQ(32u, interpolate_pixels[1]); + EXPECT_EQ(64u, interpolate_pixels[2]); + EXPECT_EQ(128u, interpolate_pixels[3]); InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0, &interpolate_pixels[0], 0, 16, 1, 192); - ASSERT_EQ(4u, interpolate_pixels[0]); - ASSERT_EQ(8u, interpolate_pixels[1]); - ASSERT_EQ(16u, interpolate_pixels[2]); - ASSERT_EQ(32u, interpolate_pixels[3]); + EXPECT_EQ(4u, interpolate_pixels[0]); + EXPECT_EQ(8u, interpolate_pixels[1]); + EXPECT_EQ(16u, interpolate_pixels[2]); + EXPECT_EQ(32u, interpolate_pixels[3]); for (int i = 0; i < benchmark_pixels_div1280_; ++i) { InterpolatePlane_16(&orig_pixels_0[0], 0, &orig_pixels_1[0], 0, @@ -1212,10 +1216,10 @@ TEST_F(LibYUVPlanarTest, TestInterpolatePlane_16) { (kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \ const int kStrideB = \ (kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \ - align_buffer_page_end(src_argb_a, kStrideA * kHeight + OFF); \ - align_buffer_page_end(src_argb_b, kStrideA * kHeight + OFF); \ - align_buffer_page_end(dst_argb_c, kStrideB * kHeight); \ - align_buffer_page_end(dst_argb_opt, kStrideB * kHeight); \ + align_buffer_page_end(src_argb_a, kStrideA* kHeight + OFF); \ + align_buffer_page_end(src_argb_b, kStrideA* kHeight + OFF); \ + align_buffer_page_end(dst_argb_c, kStrideB* kHeight); \ + align_buffer_page_end(dst_argb_opt, kStrideB* kHeight); \ for (int i = 0; i < kStrideA * kHeight; ++i) { \ src_argb_a[i + OFF] = (fastrand() & 0xff); \ src_argb_b[i + OFF] = (fastrand() & 0xff); \ @@ -1229,7 +1233,7 @@ TEST_F(LibYUVPlanarTest, TestInterpolatePlane_16) { dst_argb_opt, kStrideB, kWidth, NEG kHeight, TERP); \ } \ for (int i = 0; i < kStrideB * kHeight; ++i) { \ - ASSERT_EQ(dst_argb_c[i], dst_argb_opt[i]); \ + EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \ } \ free_aligned_buffer_page_end(src_argb_a); \ free_aligned_buffer_page_end(src_argb_b); \ @@ -1306,35 +1310,35 @@ TEST_F(LibYUVPlanarTest, ARGBBlend_Any) { int max_diff = TestBlend(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 1); - ASSERT_LE(max_diff, 1); + EXPECT_LE(max_diff, 1); } TEST_F(LibYUVPlanarTest, ARGBBlend_Unaligned) { int max_diff = TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 1, 1); - ASSERT_LE(max_diff, 1); + EXPECT_LE(max_diff, 1); } TEST_F(LibYUVPlanarTest, ARGBBlend_Invert) { int max_diff = TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, -1, 0, 1); - ASSERT_LE(max_diff, 1); + EXPECT_LE(max_diff, 1); } TEST_F(LibYUVPlanarTest, ARGBBlend_Unattenuated) { int max_diff = TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 0); - ASSERT_LE(max_diff, 1); + EXPECT_LE(max_diff, 1); } TEST_F(LibYUVPlanarTest, ARGBBlend_Opt) { int max_diff = TestBlend(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 1); - ASSERT_LE(max_diff, 1); + EXPECT_LE(max_diff, 1); } static void TestBlendPlane(int width, @@ -1366,14 +1370,14 @@ static void TestBlendPlane(int width, BlendPlane(src_argb_a + off, width, src_argb_b + off, width, src_argb_alpha + off, width, dst_argb_opt + off, width, width, 1); for (int i = 0; i < width; ++i) { - ASSERT_EQ(src_argb_a[i + off], dst_argb_opt[i + off]); + EXPECT_EQ(src_argb_a[i + off], dst_argb_opt[i + off]); } // Test destination is maintained exactly if alpha is 0. memset(src_argb_alpha + off, 0, width); BlendPlane(src_argb_a + off, width, src_argb_b + off, width, src_argb_alpha + off, width, dst_argb_opt + off, width, width, 1); for (int i = 0; i < width; ++i) { - ASSERT_EQ(src_argb_b[i + off], dst_argb_opt[i + off]); + EXPECT_EQ(src_argb_b[i + off], dst_argb_opt[i + off]); } for (int i = 0; i < kStride * height; ++i) { src_argb_a[i + off] = (fastrand() & 0xff); @@ -1392,7 +1396,7 @@ static void TestBlendPlane(int width, invert * height); } for (int i = 0; i < kStride * height; ++i) { - ASSERT_EQ(dst_argb_c[i + off], dst_argb_opt[i + off]); + EXPECT_EQ(dst_argb_c[i + off], dst_argb_opt[i + off]); } free_aligned_buffer_page_end(src_argb_a); free_aligned_buffer_page_end(src_argb_b); @@ -1418,7 +1422,7 @@ TEST_F(LibYUVPlanarTest, BlendPlane_Invert) { disable_cpu_flags_, benchmark_cpu_info_, -1, 1); } -#define SUBSAMPLE(v, a) ((((v) + (a) - 1)) / (a)) +#define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a)) static void TestI420Blend(int width, int height, @@ -1473,11 +1477,11 @@ static void TestI420Blend(int width, width, invert * height); } for (int i = 0; i < width * height; ++i) { - ASSERT_EQ(dst_y_c[i + off], dst_y_opt[i + off]); + EXPECT_EQ(dst_y_c[i + off], dst_y_opt[i + off]); } for (int i = 0; i < kSizeUV; ++i) { - ASSERT_EQ(dst_u_c[i + off], dst_u_opt[i + off]); - ASSERT_EQ(dst_v_c[i + off], dst_v_opt[i + off]); + EXPECT_EQ(dst_u_c[i + off], dst_u_opt[i + off]); + EXPECT_EQ(dst_v_c[i + off], dst_v_opt[i + off]); } free_aligned_buffer_page_end(src_y0); free_aligned_buffer_page_end(src_u0); @@ -1528,15 +1532,15 @@ TEST_F(LibYUVPlanarTest, TestAffine) { ARGBAffineRow_C(&orig_pixels_0[0][0], 0, &interpolate_pixels_C[0][0], uv_step, 1280); - ASSERT_EQ(0u, interpolate_pixels_C[0][0]); - ASSERT_EQ(96u, interpolate_pixels_C[128][0]); - ASSERT_EQ(191u, interpolate_pixels_C[255][3]); + EXPECT_EQ(0u, interpolate_pixels_C[0][0]); + EXPECT_EQ(96u, interpolate_pixels_C[128][0]); + EXPECT_EQ(191u, interpolate_pixels_C[255][3]); #if defined(HAS_ARGBAFFINEROW_SSE2) SIMD_ALIGNED(uint8_t interpolate_pixels_Opt[1280][4]); ARGBAffineRow_SSE2(&orig_pixels_0[0][0], 0, &interpolate_pixels_Opt[0][0], uv_step, 1280); - ASSERT_EQ(0, memcmp(interpolate_pixels_Opt, interpolate_pixels_C, 1280 * 4)); + EXPECT_EQ(0, memcmp(interpolate_pixels_Opt, interpolate_pixels_C, 1280 * 4)); int has_sse2 = TestCpuFlag(kCpuHasSSE2); if (has_sse2) { @@ -1599,28 +1603,28 @@ TEST_F(LibYUVPlanarTest, CopyPlane_Any) { int max_diff = TestCopyPlane(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0); - ASSERT_LE(max_diff, 0); + EXPECT_LE(max_diff, 0); } TEST_F(LibYUVPlanarTest, CopyPlane_Unaligned) { int max_diff = TestCopyPlane(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 1); - ASSERT_LE(max_diff, 0); + EXPECT_LE(max_diff, 0); } TEST_F(LibYUVPlanarTest, CopyPlane_Invert) { int max_diff = TestCopyPlane(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, -1, 0); - ASSERT_LE(max_diff, 0); + EXPECT_LE(max_diff, 0); } TEST_F(LibYUVPlanarTest, CopyPlane_Opt) { int max_diff = TestCopyPlane(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0); - ASSERT_LE(max_diff, 0); + EXPECT_LE(max_diff, 0); } TEST_F(LibYUVPlanarTest, TestCopyPlaneZero) { @@ -1632,30 +1636,30 @@ TEST_F(LibYUVPlanarTest, TestCopyPlaneZero) { // Disable all optimizations. MaskCpuFlags(disable_cpu_flags_); CopyPlane(&src, 0, &dst, 0, 0, 0); - ASSERT_EQ(src, 42); - ASSERT_EQ(dst, 0); + EXPECT_EQ(src, 42); + EXPECT_EQ(dst, 0); CopyPlane(&src, 1, &dst, 1, 1, 0); - ASSERT_EQ(src, 42); - ASSERT_EQ(dst, 0); + EXPECT_EQ(src, 42); + EXPECT_EQ(dst, 0); CopyPlane(&src, 1, &dst, 1, 0, 1); - ASSERT_EQ(src, 42); - ASSERT_EQ(dst, 0); + EXPECT_EQ(src, 42); + EXPECT_EQ(dst, 0); // Enable optimizations. MaskCpuFlags(benchmark_cpu_info_); CopyPlane(&src, 0, &dst, 0, 0, 0); - ASSERT_EQ(src, 42); - ASSERT_EQ(dst, 0); + EXPECT_EQ(src, 42); + EXPECT_EQ(dst, 0); CopyPlane(&src, 1, &dst, 1, 1, 0); - ASSERT_EQ(src, 42); - ASSERT_EQ(dst, 0); + EXPECT_EQ(src, 42); + EXPECT_EQ(dst, 0); CopyPlane(&src, 1, &dst, 1, 0, 1); - ASSERT_EQ(src, 42); - ASSERT_EQ(dst, 0); + EXPECT_EQ(src, 42); + EXPECT_EQ(dst, 0); } TEST_F(LibYUVPlanarTest, TestDetilePlane) { @@ -1689,7 +1693,7 @@ TEST_F(LibYUVPlanarTest, TestDetilePlane) { } for (i = 0; i < y_plane_size; ++i) { - ASSERT_EQ(dst_c[i], dst_opt[i]); + EXPECT_EQ(dst_c[i], dst_opt[i]); } free_aligned_buffer_page_end(tile_y); @@ -1728,7 +1732,7 @@ TEST_F(LibYUVPlanarTest, TestDetilePlane_16) { } for (i = 0; i < y_plane_size; ++i) { - ASSERT_EQ(dst_c[i], dst_opt[i]); + EXPECT_EQ(dst_c[i], dst_opt[i]); } free_aligned_buffer_page_end(tile_y); @@ -1774,8 +1778,8 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Correctness) { } for (i = 0; i < uv_plane_size; ++i) { - ASSERT_EQ(dst_u_two_stage[i], dst_u_opt[i]); - ASSERT_EQ(dst_v_two_stage[i], dst_v_opt[i]); + EXPECT_EQ(dst_u_two_stage[i], dst_u_opt[i]); + EXPECT_EQ(dst_v_two_stage[i], dst_v_opt[i]); } free_aligned_buffer_page_end(tile_uv); @@ -1823,8 +1827,8 @@ TEST_F(LibYUVPlanarTest, TestDetileSplitUVPlane_Benchmark) { } for (i = 0; i < uv_plane_size; ++i) { - ASSERT_EQ(dst_u_c[i], dst_u_opt[i]); - ASSERT_EQ(dst_v_c[i], dst_v_opt[i]); + EXPECT_EQ(dst_u_c[i], dst_u_opt[i]); + EXPECT_EQ(dst_v_c[i], dst_v_opt[i]); } free_aligned_buffer_page_end(tile_uv); @@ -1884,28 +1888,28 @@ TEST_F(LibYUVPlanarTest, ARGBMultiply_Any) { int max_diff = TestMultiply(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0); - ASSERT_LE(max_diff, 0); + EXPECT_LE(max_diff, 0); } TEST_F(LibYUVPlanarTest, ARGBMultiply_Unaligned) { int max_diff = TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 1); - ASSERT_LE(max_diff, 0); + EXPECT_LE(max_diff, 0); } TEST_F(LibYUVPlanarTest, ARGBMultiply_Invert) { int max_diff = TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, -1, 0); - ASSERT_LE(max_diff, 0); + EXPECT_LE(max_diff, 0); } TEST_F(LibYUVPlanarTest, ARGBMultiply_Opt) { int max_diff = TestMultiply(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0); - ASSERT_LE(max_diff, 0); + EXPECT_LE(max_diff, 0); } static int TestAdd(int width, @@ -1958,28 +1962,28 @@ TEST_F(LibYUVPlanarTest, ARGBAdd_Any) { int max_diff = TestAdd(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0); - ASSERT_LE(max_diff, 1); + EXPECT_LE(max_diff, 1); } TEST_F(LibYUVPlanarTest, ARGBAdd_Unaligned) { int max_diff = TestAdd(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 1); - ASSERT_LE(max_diff, 1); + EXPECT_LE(max_diff, 1); } TEST_F(LibYUVPlanarTest, ARGBAdd_Invert) { int max_diff = TestAdd(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, -1, 0); - ASSERT_LE(max_diff, 1); + EXPECT_LE(max_diff, 1); } TEST_F(LibYUVPlanarTest, ARGBAdd_Opt) { int max_diff = TestAdd(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0); - ASSERT_LE(max_diff, 1); + EXPECT_LE(max_diff, 1); } static int TestSubtract(int width, @@ -2032,28 +2036,28 @@ TEST_F(LibYUVPlanarTest, ARGBSubtract_Any) { int max_diff = TestSubtract(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0); - ASSERT_LE(max_diff, 1); + EXPECT_LE(max_diff, 1); } TEST_F(LibYUVPlanarTest, ARGBSubtract_Unaligned) { int max_diff = TestSubtract(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 1); - ASSERT_LE(max_diff, 1); + EXPECT_LE(max_diff, 1); } TEST_F(LibYUVPlanarTest, ARGBSubtract_Invert) { int max_diff = TestSubtract(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, -1, 0); - ASSERT_LE(max_diff, 1); + EXPECT_LE(max_diff, 1); } TEST_F(LibYUVPlanarTest, ARGBSubtract_Opt) { int max_diff = TestSubtract(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0); - ASSERT_LE(max_diff, 1); + EXPECT_LE(max_diff, 1); } static int TestSobel(int width, @@ -2104,28 +2108,28 @@ TEST_F(LibYUVPlanarTest, ARGBSobel_Any) { int max_diff = TestSobel(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0); - ASSERT_EQ(0, max_diff); + EXPECT_EQ(0, max_diff); } TEST_F(LibYUVPlanarTest, ARGBSobel_Unaligned) { int max_diff = TestSobel(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 1); - ASSERT_EQ(0, max_diff); + EXPECT_EQ(0, max_diff); } TEST_F(LibYUVPlanarTest, ARGBSobel_Invert) { int max_diff = TestSobel(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, -1, 0); - ASSERT_EQ(0, max_diff); + EXPECT_EQ(0, max_diff); } TEST_F(LibYUVPlanarTest, ARGBSobel_Opt) { int max_diff = TestSobel(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0); - ASSERT_EQ(0, max_diff); + EXPECT_EQ(0, max_diff); } static int TestSobelToPlane(int width, @@ -2178,28 +2182,28 @@ TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Any) { int max_diff = TestSobelToPlane(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0); - ASSERT_EQ(0, max_diff); + EXPECT_EQ(0, max_diff); } TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Unaligned) { int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 1); - ASSERT_EQ(0, max_diff); + EXPECT_EQ(0, max_diff); } TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Invert) { int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, -1, 0); - ASSERT_EQ(0, max_diff); + EXPECT_EQ(0, max_diff); } TEST_F(LibYUVPlanarTest, ARGBSobelToPlane_Opt) { int max_diff = TestSobelToPlane(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0); - ASSERT_EQ(0, max_diff); + EXPECT_EQ(0, max_diff); } static int TestSobelXY(int width, @@ -2250,28 +2254,28 @@ TEST_F(LibYUVPlanarTest, ARGBSobelXY_Any) { int max_diff = TestSobelXY(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0); - ASSERT_EQ(0, max_diff); + EXPECT_EQ(0, max_diff); } TEST_F(LibYUVPlanarTest, ARGBSobelXY_Unaligned) { int max_diff = TestSobelXY(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 1); - ASSERT_EQ(0, max_diff); + EXPECT_EQ(0, max_diff); } TEST_F(LibYUVPlanarTest, ARGBSobelXY_Invert) { int max_diff = TestSobelXY(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, -1, 0); - ASSERT_EQ(0, max_diff); + EXPECT_EQ(0, max_diff); } TEST_F(LibYUVPlanarTest, ARGBSobelXY_Opt) { int max_diff = TestSobelXY(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0); - ASSERT_EQ(0, max_diff); + EXPECT_EQ(0, max_diff); } static int TestBlur(int width, @@ -2334,28 +2338,28 @@ TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlur_Any)) { int max_diff = TestBlur(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSize); - ASSERT_LE(max_diff, 1); + EXPECT_LE(max_diff, 1); } TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlur_Unaligned)) { int max_diff = TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 1, kBlurSize); - ASSERT_LE(max_diff, 1); + EXPECT_LE(max_diff, 1); } TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlur_Invert)) { int max_diff = TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, -1, 0, kBlurSize); - ASSERT_LE(max_diff, 1); + EXPECT_LE(max_diff, 1); } TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlur_Opt)) { int max_diff = TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSize); - ASSERT_LE(max_diff, 1); + EXPECT_LE(max_diff, 1); } static const int kBlurSmallSize = 5; @@ -2363,28 +2367,28 @@ TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlurSmall_Any)) { int max_diff = TestBlur(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSmallSize); - ASSERT_LE(max_diff, 1); + EXPECT_LE(max_diff, 1); } TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlurSmall_Unaligned)) { int max_diff = TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 1, kBlurSmallSize); - ASSERT_LE(max_diff, 1); + EXPECT_LE(max_diff, 1); } TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlurSmall_Invert)) { int max_diff = TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, -1, 0, kBlurSmallSize); - ASSERT_LE(max_diff, 1); + EXPECT_LE(max_diff, 1); } TEST_F(LibYUVPlanarTest, DISABLED_ARM(ARGBBlurSmall_Opt)) { int max_diff = TestBlur(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0, kBlurSmallSize); - ASSERT_LE(max_diff, 1); + EXPECT_LE(max_diff, 1); } TEST_F(LibYUVPlanarTest, DISABLED_ARM(TestARGBPolynomial)) { @@ -2428,26 +2432,26 @@ TEST_F(LibYUVPlanarTest, DISABLED_ARM(TestARGBPolynomial)) { // Do 16 to test asm version. ARGBPolynomial(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0, &kWarmifyPolynomial[0], 16, 1); - ASSERT_EQ(235u, dst_pixels_opt[0][0]); - ASSERT_EQ(0u, dst_pixels_opt[0][1]); - ASSERT_EQ(0u, dst_pixels_opt[0][2]); - ASSERT_EQ(128u, dst_pixels_opt[0][3]); - ASSERT_EQ(0u, dst_pixels_opt[1][0]); - ASSERT_EQ(233u, dst_pixels_opt[1][1]); - ASSERT_EQ(0u, dst_pixels_opt[1][2]); - ASSERT_EQ(0u, dst_pixels_opt[1][3]); - ASSERT_EQ(0u, dst_pixels_opt[2][0]); - ASSERT_EQ(0u, dst_pixels_opt[2][1]); - ASSERT_EQ(241u, dst_pixels_opt[2][2]); - ASSERT_EQ(255u, dst_pixels_opt[2][3]); - ASSERT_EQ(235u, dst_pixels_opt[3][0]); - ASSERT_EQ(233u, dst_pixels_opt[3][1]); - ASSERT_EQ(241u, dst_pixels_opt[3][2]); - ASSERT_EQ(255u, dst_pixels_opt[3][3]); - ASSERT_EQ(10u, dst_pixels_opt[4][0]); - ASSERT_EQ(59u, dst_pixels_opt[4][1]); - ASSERT_EQ(188u, dst_pixels_opt[4][2]); - ASSERT_EQ(224u, dst_pixels_opt[4][3]); + EXPECT_EQ(235u, dst_pixels_opt[0][0]); + EXPECT_EQ(0u, dst_pixels_opt[0][1]); + EXPECT_EQ(0u, dst_pixels_opt[0][2]); + EXPECT_EQ(128u, dst_pixels_opt[0][3]); + EXPECT_EQ(0u, dst_pixels_opt[1][0]); + EXPECT_EQ(233u, dst_pixels_opt[1][1]); + EXPECT_EQ(0u, dst_pixels_opt[1][2]); + EXPECT_EQ(0u, dst_pixels_opt[1][3]); + EXPECT_EQ(0u, dst_pixels_opt[2][0]); + EXPECT_EQ(0u, dst_pixels_opt[2][1]); + EXPECT_EQ(241u, dst_pixels_opt[2][2]); + EXPECT_EQ(255u, dst_pixels_opt[2][3]); + EXPECT_EQ(235u, dst_pixels_opt[3][0]); + EXPECT_EQ(233u, dst_pixels_opt[3][1]); + EXPECT_EQ(241u, dst_pixels_opt[3][2]); + EXPECT_EQ(255u, dst_pixels_opt[3][3]); + EXPECT_EQ(10u, dst_pixels_opt[4][0]); + EXPECT_EQ(59u, dst_pixels_opt[4][1]); + EXPECT_EQ(188u, dst_pixels_opt[4][2]); + EXPECT_EQ(224u, dst_pixels_opt[4][3]); for (int i = 0; i < 1280; ++i) { orig_pixels[i][0] = i; @@ -2467,10 +2471,10 @@ TEST_F(LibYUVPlanarTest, DISABLED_ARM(TestARGBPolynomial)) { } for (int i = 0; i < 1280; ++i) { - ASSERT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]); - ASSERT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]); - ASSERT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]); - ASSERT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]); + EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]); + EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]); + EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]); + EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]); } } @@ -2535,70 +2539,70 @@ TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_One) { int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, 1.0f, 65535, +1, 0); - ASSERT_LE(diff, 1); + EXPECT_LE(diff, 1); } TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_Opt) { int diff = TestHalfFloatPlane( benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, 1.0f / 65535.0f, 65535, +1, 0); - ASSERT_EQ(0, diff); + EXPECT_EQ(0, diff); } TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_12bit_Opt) { int diff = TestHalfFloatPlane( benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, 1.0f / 4095.0f, 4095, +1, 0); - ASSERT_EQ(0, diff); + EXPECT_EQ(0, diff); } TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_10bit_Opt) { int diff = TestHalfFloatPlane( benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, 1.0f / 1023.0f, 1023, +1, 0); - ASSERT_EQ(0, diff); + EXPECT_EQ(0, diff); } TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_9bit_Opt) { int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, 1.0f / 511.0f, 511, +1, 0); - ASSERT_EQ(0, diff); + EXPECT_EQ(0, diff); } TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Any) { int diff = TestHalfFloatPlane( benchmark_width_ + 1, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, 1.0f / 4096.0f, 4095, +1, 0); - ASSERT_EQ(0, diff); + EXPECT_EQ(0, diff); } TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Unaligned) { int diff = TestHalfFloatPlane( benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, 1.0f / 4096.0f, 4095, +1, 2); - ASSERT_EQ(0, diff); + EXPECT_EQ(0, diff); } TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Invert) { int diff = TestHalfFloatPlane( benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, 1.0f / 4096.0f, 4095, -1, 0); - ASSERT_EQ(0, diff); + EXPECT_EQ(0, diff); } TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Opt) { int diff = TestHalfFloatPlane( benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, 1.0f / 4096.0f, 4095, +1, 0); - ASSERT_EQ(0, diff); + EXPECT_EQ(0, diff); } TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_denormal) { int diff = TestHalfFloatPlane( benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, 1.0f / 65535.0f, 65535, +1, 0); - ASSERT_EQ(0, diff); + EXPECT_EQ(0, diff); } #if defined(__arm__) && !defined(__SOFTFP__) @@ -2631,7 +2635,7 @@ TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_flush_denormal) { benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, 1.0f / 65535.0f, 65535, +1, 0); DisableFlushDenormalToZero(); - ASSERT_EQ(0, diff); + EXPECT_EQ(0, diff); } TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_10bit_flush_denormal) { @@ -2640,7 +2644,7 @@ TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_10bit_flush_denormal) { benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, 1.0f / 1023.0f, 1023, +1, 0); DisableFlushDenormalToZero(); - ASSERT_EQ(0, diff); + EXPECT_EQ(0, diff); } #endif // defined(__arm__) && !defined(__SOFTFP__) @@ -2687,7 +2691,7 @@ TEST_F(LibYUVPlanarTest, TestByteToFloat) { float diff = TestByteToFloat(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, 1.0f); - ASSERT_EQ(0.f, diff); + EXPECT_EQ(0.f, diff); } TEST_F(LibYUVPlanarTest, TestARGBLumaColorTable) { @@ -2725,22 +2729,22 @@ TEST_F(LibYUVPlanarTest, TestARGBLumaColorTable) { // Do 16 to test asm version. ARGBLumaColorTable(&orig_pixels[0][0], 0, &dst_pixels_opt[0][0], 0, &lumacolortable[0], 16, 1); - ASSERT_EQ(253u, dst_pixels_opt[0][0]); - ASSERT_EQ(0u, dst_pixels_opt[0][1]); - ASSERT_EQ(0u, dst_pixels_opt[0][2]); - ASSERT_EQ(128u, dst_pixels_opt[0][3]); - ASSERT_EQ(0u, dst_pixels_opt[1][0]); - ASSERT_EQ(253u, dst_pixels_opt[1][1]); - ASSERT_EQ(0u, dst_pixels_opt[1][2]); - ASSERT_EQ(0u, dst_pixels_opt[1][3]); - ASSERT_EQ(0u, dst_pixels_opt[2][0]); - ASSERT_EQ(0u, dst_pixels_opt[2][1]); - ASSERT_EQ(253u, dst_pixels_opt[2][2]); - ASSERT_EQ(255u, dst_pixels_opt[2][3]); - ASSERT_EQ(48u, dst_pixels_opt[3][0]); - ASSERT_EQ(192u, dst_pixels_opt[3][1]); - ASSERT_EQ(64u, dst_pixels_opt[3][2]); - ASSERT_EQ(224u, dst_pixels_opt[3][3]); + EXPECT_EQ(253u, dst_pixels_opt[0][0]); + EXPECT_EQ(0u, dst_pixels_opt[0][1]); + EXPECT_EQ(0u, dst_pixels_opt[0][2]); + EXPECT_EQ(128u, dst_pixels_opt[0][3]); + EXPECT_EQ(0u, dst_pixels_opt[1][0]); + EXPECT_EQ(253u, dst_pixels_opt[1][1]); + EXPECT_EQ(0u, dst_pixels_opt[1][2]); + EXPECT_EQ(0u, dst_pixels_opt[1][3]); + EXPECT_EQ(0u, dst_pixels_opt[2][0]); + EXPECT_EQ(0u, dst_pixels_opt[2][1]); + EXPECT_EQ(253u, dst_pixels_opt[2][2]); + EXPECT_EQ(255u, dst_pixels_opt[2][3]); + EXPECT_EQ(48u, dst_pixels_opt[3][0]); + EXPECT_EQ(192u, dst_pixels_opt[3][1]); + EXPECT_EQ(64u, dst_pixels_opt[3][2]); + EXPECT_EQ(224u, dst_pixels_opt[3][3]); for (int i = 0; i < 1280; ++i) { orig_pixels[i][0] = i; @@ -2759,10 +2763,10 @@ TEST_F(LibYUVPlanarTest, TestARGBLumaColorTable) { lumacolortable, 1280, 1); } for (int i = 0; i < 1280; ++i) { - ASSERT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]); - ASSERT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]); - ASSERT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]); - ASSERT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]); + EXPECT_EQ(dst_pixels_c[i][0], dst_pixels_opt[i][0]); + EXPECT_EQ(dst_pixels_c[i][1], dst_pixels_opt[i][1]); + EXPECT_EQ(dst_pixels_c[i][2], dst_pixels_opt[i][2]); + EXPECT_EQ(dst_pixels_c[i][3], dst_pixels_opt[i][3]); } free_aligned_buffer_page_end(lumacolortable); @@ -2788,7 +2792,7 @@ TEST_F(LibYUVPlanarTest, TestARGBCopyAlpha) { benchmark_width_ * 4, benchmark_width_, benchmark_height_); } for (int i = 0; i < kSize; ++i) { - ASSERT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); } free_aligned_buffer_page_end(dst_pixels_c); @@ -2827,7 +2831,7 @@ TEST_F(LibYUVPlanarTest, TestARGBExtractAlpha) { printf("%8d us C - %8d us OPT\n", static_cast(c_time * 1e6), static_cast(opt_time * 1e6)); for (int i = 0; i < kPixels; ++i) { - ASSERT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); } free_aligned_buffer_page_end(dst_pixels_c); @@ -2867,7 +2871,7 @@ TEST_F(LibYUVPlanarTest, TestARGBCopyYToAlpha) { printf("%8d us C - %8d us OPT\n", static_cast(c_time * 1e6), static_cast(opt_time * 1e6)); for (int i = 0; i < kPixels * 4; ++i) { - ASSERT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); } free_aligned_buffer_page_end(dst_pixels_c); @@ -2928,56 +2932,56 @@ TEST_F(LibYUVPlanarTest, ARGBRect_Any) { int max_diff = TestARGBRect(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 4); - ASSERT_EQ(0, max_diff); + EXPECT_EQ(0, max_diff); } TEST_F(LibYUVPlanarTest, ARGBRect_Unaligned) { int max_diff = TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 1, 4); - ASSERT_EQ(0, max_diff); + EXPECT_EQ(0, max_diff); } TEST_F(LibYUVPlanarTest, ARGBRect_Invert) { int max_diff = TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, -1, 0, 4); - ASSERT_EQ(0, max_diff); + EXPECT_EQ(0, max_diff); } TEST_F(LibYUVPlanarTest, ARGBRect_Opt) { int max_diff = TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 4); - ASSERT_EQ(0, max_diff); + EXPECT_EQ(0, max_diff); } TEST_F(LibYUVPlanarTest, SetPlane_Any) { int max_diff = TestARGBRect(benchmark_width_ + 1, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 1); - ASSERT_EQ(0, max_diff); + EXPECT_EQ(0, max_diff); } TEST_F(LibYUVPlanarTest, SetPlane_Unaligned) { int max_diff = TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 1, 1); - ASSERT_EQ(0, max_diff); + EXPECT_EQ(0, max_diff); } TEST_F(LibYUVPlanarTest, SetPlane_Invert) { int max_diff = TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, -1, 0, 1); - ASSERT_EQ(0, max_diff); + EXPECT_EQ(0, max_diff); } TEST_F(LibYUVPlanarTest, SetPlane_Opt) { int max_diff = TestARGBRect(benchmark_width_, benchmark_height_, benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_, +1, 0, 1); - ASSERT_EQ(0, max_diff); + EXPECT_EQ(0, max_diff); } TEST_F(LibYUVPlanarTest, MergeUVPlane_Opt) { @@ -3005,7 +3009,7 @@ TEST_F(LibYUVPlanarTest, MergeUVPlane_Opt) { } for (int i = 0; i < kPixels * 2; ++i) { - ASSERT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); } free_aligned_buffer_page_end(src_pixels_u); @@ -3041,7 +3045,7 @@ TEST_F(LibYUVPlanarTest, MergeUVPlane_16_Opt) { } for (int i = 0; i < kPixels * 2 * 2; ++i) { - ASSERT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); } free_aligned_buffer_page_end(src_pixels_u); free_aligned_buffer_page_end(src_pixels_v); @@ -3076,8 +3080,8 @@ TEST_F(LibYUVPlanarTest, SplitUVPlane_Opt) { } for (int i = 0; i < kPixels; ++i) { - ASSERT_EQ(dst_pixels_u_c[i], dst_pixels_u_opt[i]); - ASSERT_EQ(dst_pixels_v_c[i], dst_pixels_v_opt[i]); + EXPECT_EQ(dst_pixels_u_c[i], dst_pixels_u_opt[i]); + EXPECT_EQ(dst_pixels_v_c[i], dst_pixels_v_opt[i]); } free_aligned_buffer_page_end(src_pixels); @@ -3116,8 +3120,8 @@ TEST_F(LibYUVPlanarTest, SplitUVPlane_16_Opt) { } for (int i = 0; i < kPixels * 2; ++i) { - ASSERT_EQ(dst_pixels_u_c[i], dst_pixels_u_opt[i]); - ASSERT_EQ(dst_pixels_v_c[i], dst_pixels_v_opt[i]); + EXPECT_EQ(dst_pixels_u_c[i], dst_pixels_u_opt[i]); + EXPECT_EQ(dst_pixels_v_c[i], dst_pixels_v_opt[i]); } free_aligned_buffer_page_end(src_pixels); free_aligned_buffer_page_end(dst_pixels_u_c); @@ -3148,7 +3152,7 @@ TEST_F(LibYUVPlanarTest, SwapUVPlane_Opt) { } for (int i = 0; i < kPixels * 2; ++i) { - ASSERT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); } free_aligned_buffer_page_end(src_pixels); @@ -3203,7 +3207,7 @@ TEST_F(LibYUVPlanarTest, MergeRGBPlane_Opt) { } for (int i = 0; i < kPixels * 3; ++i) { - ASSERT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); } free_aligned_buffer_page_end(src_pixels); @@ -3263,7 +3267,7 @@ TEST_F(LibYUVPlanarTest, SplitRGBPlane_Opt) { benchmark_height_); for (int i = 0; i < kPixels * 3; ++i) { - ASSERT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); } free_aligned_buffer_page_end(src_pixels); @@ -3327,7 +3331,7 @@ TEST_F(LibYUVPlanarTest, MergeARGBPlane_Opt) { } for (int i = 0; i < kPixels * 4; ++i) { - ASSERT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); } free_aligned_buffer_page_end(src_pixels); @@ -3393,7 +3397,7 @@ TEST_F(LibYUVPlanarTest, SplitARGBPlane_Opt) { benchmark_width_ * 4, benchmark_width_, benchmark_height_); for (int i = 0; i < kPixels * 4; ++i) { - ASSERT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); } free_aligned_buffer_page_end(src_pixels); @@ -3455,7 +3459,7 @@ TEST_F(LibYUVPlanarTest, MergeXRGBPlane_Opt) { } for (int i = 0; i < kPixels * 4; ++i) { - ASSERT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); } free_aligned_buffer_page_end(src_pixels); @@ -3515,7 +3519,7 @@ TEST_F(LibYUVPlanarTest, SplitXRGBPlane_Opt) { benchmark_height_); for (int i = 0; i < kPixels * 4; ++i) { - ASSERT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); } free_aligned_buffer_page_end(src_pixels); @@ -3563,7 +3567,7 @@ TEST_F(LibYUVPlanarTest, SplitXRGBPlane_Opt) { kWidth, NEG benchmark_height_, DEPTH); \ } \ for (int i = 0; i < kPixels * 4; ++i) { \ - ASSERT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); \ + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); \ } \ free_aligned_buffer_page_end(src_memory_r); \ free_aligned_buffer_page_end(src_memory_g); \ @@ -3604,7 +3608,7 @@ TEST_F(LibYUVPlanarTest, SplitXRGBPlane_Opt) { NEG benchmark_height_, DEPTH); \ } \ for (int i = 0; i < kPixels * 4; ++i) { \ - ASSERT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); \ + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); \ } \ free_aligned_buffer_page_end(src_memory_r); \ free_aligned_buffer_page_end(src_memory_g); \ @@ -3663,7 +3667,7 @@ TESTQPLANARTOP(MergeARGB16To8, uint16_t, uint8_t, 16) NEG benchmark_height_, DEPTH); \ } \ for (int i = 0; i < kPixels * 4; ++i) { \ - ASSERT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); \ + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); \ } \ free_aligned_buffer_page_end(src_memory_r); \ free_aligned_buffer_page_end(src_memory_g); \ @@ -3719,7 +3723,7 @@ TEST_F(LibYUVPlanarTest, MergeUVRow_16_Opt) { } for (int i = 0; i < kPixels * 2 * 2; ++i) { - ASSERT_EQ(dst_pixels_uv_opt[i], dst_pixels_uv_c[i]); + EXPECT_EQ(dst_pixels_uv_opt[i], dst_pixels_uv_c[i]); } free_aligned_buffer_page_end(src_pixels_u); @@ -3760,7 +3764,7 @@ TEST_F(LibYUVPlanarTest, MultiplyRow_16_Opt) { } for (int i = 0; i < kPixels * 2; ++i) { - ASSERT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]); + EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]); } free_aligned_buffer_page_end(src_pixels_y); @@ -3792,7 +3796,7 @@ TEST_F(LibYUVPlanarTest, Convert16To8Plane) { } for (int i = 0; i < kPixels; ++i) { - ASSERT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]); + EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]); } free_aligned_buffer_page_end(src_pixels_y); @@ -3823,7 +3827,7 @@ TEST_F(LibYUVPlanarTest, Convert8To8Plane) { } for (int i = 0; i < kPixels; ++i) { - ASSERT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]); + EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]); } free_aligned_buffer_page_end(src_pixels_y); @@ -3852,7 +3856,7 @@ TEST_F(LibYUVPlanarTest, YUY2ToY) { } for (int i = 0; i < kPixels; ++i) { - ASSERT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]); + EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]); } free_aligned_buffer_page_end(src_pixels_y); @@ -3881,7 +3885,7 @@ TEST_F(LibYUVPlanarTest, UYVYToY) { } for (int i = 0; i < kPixels; ++i) { - ASSERT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]); + EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]); } free_aligned_buffer_page_end(src_pixels_y); @@ -3927,7 +3931,7 @@ TEST_F(LibYUVPlanarTest, Convert16To8Row_Opt) { } for (int i = 0; i < kPixels; ++i) { - ASSERT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]); + EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]); } free_aligned_buffer_page_end(src_pixels_y); @@ -3955,7 +3959,7 @@ TEST_F(LibYUVPlanarTest, UYVYToYRow_Opt) { } for (int i = 0; i < kPixels; ++i) { - ASSERT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]); + EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]); } free_aligned_buffer_page_end(src_pixels_y); @@ -3991,7 +3995,7 @@ TEST_F(LibYUVPlanarTest, Convert8To16Plane) { } for (int i = 0; i < kPixels * 2; ++i) { - ASSERT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]); + EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]); } free_aligned_buffer_page_end(src_pixels_y); @@ -4034,7 +4038,7 @@ TEST_F(LibYUVPlanarTest, Convert8To16Row_Opt) { } for (int i = 0; i < kPixels * 2; ++i) { - ASSERT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]); + EXPECT_EQ(dst_pixels_y_opt[i], dst_pixels_y_c[i]); } free_aligned_buffer_page_end(src_pixels_y); @@ -4102,13 +4106,13 @@ float TestScaleMaxSamples(int benchmark_width, TEST_F(LibYUVPlanarTest, TestScaleMaxSamples_C) { float diff = TestScaleMaxSamples(benchmark_width_, benchmark_height_, benchmark_iterations_, 1.2f, false); - ASSERT_EQ(0, diff); + EXPECT_EQ(0, diff); } TEST_F(LibYUVPlanarTest, TestScaleMaxSamples_Opt) { float diff = TestScaleMaxSamples(benchmark_width_, benchmark_height_, benchmark_iterations_, 1.2f, true); - ASSERT_EQ(0, diff); + EXPECT_EQ(0, diff); } float TestScaleSumSamples(int benchmark_width, @@ -4181,13 +4185,13 @@ float TestScaleSumSamples(int benchmark_width, TEST_F(LibYUVPlanarTest, TestScaleSumSamples_C) { float diff = TestScaleSumSamples(benchmark_width_, benchmark_height_, benchmark_iterations_, 1.2f, false); - ASSERT_EQ(0, diff); + EXPECT_EQ(0, diff); } TEST_F(LibYUVPlanarTest, TestScaleSumSamples_Opt) { float diff = TestScaleSumSamples(benchmark_width_, benchmark_height_, benchmark_iterations_, 1.2f, true); - ASSERT_EQ(0, diff); + EXPECT_EQ(0, diff); } float TestScaleSamples(int benchmark_width, @@ -4245,13 +4249,13 @@ float TestScaleSamples(int benchmark_width, TEST_F(LibYUVPlanarTest, TestScaleSamples_C) { float diff = TestScaleSamples(benchmark_width_, benchmark_height_, benchmark_iterations_, 1.2f, false); - ASSERT_EQ(0, diff); + EXPECT_EQ(0, diff); } TEST_F(LibYUVPlanarTest, TestScaleSamples_Opt) { float diff = TestScaleSamples(benchmark_width_, benchmark_height_, benchmark_iterations_, 1.2f, true); - ASSERT_EQ(0, diff); + EXPECT_EQ(0, diff); } float TestCopySamples(int benchmark_width, @@ -4305,13 +4309,13 @@ float TestCopySamples(int benchmark_width, TEST_F(LibYUVPlanarTest, TestCopySamples_C) { float diff = TestCopySamples(benchmark_width_, benchmark_height_, benchmark_iterations_, false); - ASSERT_EQ(0, diff); + EXPECT_EQ(0, diff); } TEST_F(LibYUVPlanarTest, TestCopySamples_Opt) { float diff = TestCopySamples(benchmark_width_, benchmark_height_, benchmark_iterations_, true); - ASSERT_EQ(0, diff); + EXPECT_EQ(0, diff); } extern "C" void GaussRow_NEON(const uint32_t* src, uint16_t* dst, int width); @@ -4345,12 +4349,12 @@ TEST_F(LibYUVPlanarTest, TestGaussRow_Opt) { } for (int i = 0; i < 1280; ++i) { - ASSERT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); } - ASSERT_EQ(dst_pixels_c[0], + EXPECT_EQ(dst_pixels_c[0], static_cast(0 * 1 + 1 * 4 + 2 * 6 + 3 * 4 + 4 * 1)); - ASSERT_EQ(dst_pixels_c[639], static_cast(10256)); + EXPECT_EQ(dst_pixels_c[639], static_cast(10256)); } extern "C" void GaussCol_NEON(const uint16_t* src0, @@ -4405,7 +4409,7 @@ TEST_F(LibYUVPlanarTest, TestGaussCol_Opt) { } for (int i = 0; i < 1280; ++i) { - ASSERT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); } } @@ -4436,7 +4440,7 @@ TEST_F(LibYUVPlanarTest, TestGaussRow_F32_Opt) { } for (int i = 0; i < 1280; ++i) { - ASSERT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); } } @@ -4476,7 +4480,7 @@ TEST_F(LibYUVPlanarTest, TestGaussCol_F32_Opt) { } for (int i = 0; i < 1280; ++i) { - ASSERT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); } free_aligned_buffer_page_end(orig_pixels_buf); } @@ -4504,8 +4508,8 @@ TEST_F(LibYUVPlanarTest, SwapUVRow) { SwapUVRow(src_pixels_vu, dst_pixels_uv, kPixels); } for (int i = 0; i < kPixels; ++i) { - ASSERT_EQ(dst_pixels_uv[i * 2 + 0], src_pixels_vu[i * 2 + 1]); - ASSERT_EQ(dst_pixels_uv[i * 2 + 1], src_pixels_vu[i * 2 + 0]); + EXPECT_EQ(dst_pixels_uv[i * 2 + 0], src_pixels_vu[i * 2 + 1]); + EXPECT_EQ(dst_pixels_uv[i * 2 + 1], src_pixels_vu[i * 2 + 0]); } free_aligned_buffer_page_end(src_pixels_vu); @@ -4537,7 +4541,7 @@ TEST_F(LibYUVPlanarTest, TestGaussPlane_F32) { benchmark_height_); } for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) { - ASSERT_NEAR(((float*)(dst_pixels_c))[i], ((float*)(dst_pixels_opt))[i], 1.f) + EXPECT_NEAR(((float*)(dst_pixels_c))[i], ((float*)(dst_pixels_opt))[i], 1.f) << i; } @@ -4572,7 +4576,7 @@ TEST_F(LibYUVPlanarTest, HalfMergeUVPlane_Opt) { } for (int i = 0; i < dst_width * 2 * dst_height; ++i) { - ASSERT_EQ(dst_pixels_uv_c[i], dst_pixels_uv_opt[i]); + EXPECT_EQ(dst_pixels_uv_c[i], dst_pixels_uv_opt[i]); } free_aligned_buffer_page_end(src_pixels_u); @@ -4601,10 +4605,10 @@ TEST_F(LibYUVPlanarTest, NV12Copy) { } for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) { - ASSERT_EQ(src_y[i], dst_y[i]); + EXPECT_EQ(src_y[i], dst_y[i]); } for (int i = 0; i < halfwidth * 2 * halfheight; ++i) { - ASSERT_EQ(src_uv[i], dst_uv[i]); + EXPECT_EQ(src_uv[i], dst_uv[i]); } free_aligned_buffer_page_end(src_y); @@ -4633,10 +4637,10 @@ TEST_F(LibYUVPlanarTest, NV21Copy) { } for (int i = 0; i < benchmark_width_ * benchmark_height_; ++i) { - ASSERT_EQ(src_y[i], dst_y[i]); + EXPECT_EQ(src_y[i], dst_y[i]); } for (int i = 0; i < halfwidth * 2 * halfheight; ++i) { - ASSERT_EQ(src_vu[i], dst_vu[i]); + EXPECT_EQ(src_vu[i], dst_vu[i]); } free_aligned_buffer_page_end(src_y); @@ -4676,7 +4680,7 @@ TEST_F(LibYUVPlanarTest, TestConvertFP16ToFP32) { y_plane_size); for (i = 0; i < y_plane_size; ++i) { - ASSERT_EQ(((const uint16_t*)orig_y)[i], ((const uint16_t*)rec_opt)[i]); + EXPECT_EQ(((const uint16_t*)orig_y)[i], ((const uint16_t*)rec_opt)[i]); } free_aligned_buffer_page_end(orig_f); @@ -4713,7 +4717,7 @@ TEST_F(LibYUVPlanarTest, TestConvertFP16ToFP32Column) { y_plane_size); for (i = 0; i < y_plane_size; ++i) { - ASSERT_EQ(((const uint16_t*)orig_y)[i], ((const uint16_t*)rec_opt)[i]); + EXPECT_EQ(((const uint16_t*)orig_y)[i], ((const uint16_t*)rec_opt)[i]); } free_aligned_buffer_page_end(orig_f); diff --git a/unit_test/rotate_argb_test.cc b/unit_test/rotate_argb_test.cc index 701e57a01..4c7b0b250 100644 --- a/unit_test/rotate_argb_test.cc +++ b/unit_test/rotate_argb_test.cc @@ -75,7 +75,7 @@ static void TestRotateBpp(int src_width, // Rotation should be exact. for (int i = 0; i < dst_argb_plane_size; ++i) { - ASSERT_EQ(dst_argb_c[i], dst_argb_opt[i]); + EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); } free_aligned_buffer_page_end(dst_argb_c); @@ -189,35 +189,35 @@ TEST_F(LibYUVRotateTest, RotatePlane90_TestStride) { align_buffer_page_end(src_argb, argb_plane_size); align_buffer_page_end(dst_argb, argb_plane_size); - ASSERT_EQ(0, ARGBRotate(src_argb, benchmark_width_ * 4, dst_argb, + EXPECT_EQ(0, ARGBRotate(src_argb, benchmark_width_ * 4, dst_argb, benchmark_width_ * 4, benchmark_width_, benchmark_height_, kRotate0)); - ASSERT_EQ(0, ARGBRotate(src_argb, benchmark_width_ * 4 - 1, dst_argb, + EXPECT_EQ(0, ARGBRotate(src_argb, benchmark_width_ * 4 - 1, dst_argb, benchmark_width_ * 4 - 1, benchmark_width_ - 1, benchmark_height_, kRotate0)); - ASSERT_EQ(0, ARGBRotate(src_argb, benchmark_width_ * 4, dst_argb, + EXPECT_EQ(0, ARGBRotate(src_argb, benchmark_width_ * 4, dst_argb, benchmark_width_ * 4, benchmark_width_, benchmark_height_, kRotate180)); - ASSERT_EQ(0, ARGBRotate(src_argb, benchmark_width_ * 4 - 1, dst_argb, + EXPECT_EQ(0, ARGBRotate(src_argb, benchmark_width_ * 4 - 1, dst_argb, benchmark_width_ * 4 - 1, benchmark_width_ - 1, benchmark_height_, kRotate180)); - ASSERT_EQ(0, ARGBRotate(src_argb, benchmark_width_ * 4, dst_argb, + EXPECT_EQ(0, ARGBRotate(src_argb, benchmark_width_ * 4, dst_argb, abs(benchmark_height_) * 4, benchmark_width_, benchmark_height_, kRotate90)); - ASSERT_EQ(-1, ARGBRotate(src_argb, benchmark_width_ * 4 - 1, dst_argb, + EXPECT_EQ(-1, ARGBRotate(src_argb, benchmark_width_ * 4 - 1, dst_argb, abs(benchmark_height_) * 4, benchmark_width_ - 1, benchmark_height_, kRotate90)); - ASSERT_EQ(0, ARGBRotate(src_argb, benchmark_width_ * 4, dst_argb, + EXPECT_EQ(0, ARGBRotate(src_argb, benchmark_width_ * 4, dst_argb, abs(benchmark_height_) * 4, benchmark_width_, benchmark_height_, kRotate270)); - ASSERT_EQ(-1, ARGBRotate(src_argb, benchmark_width_ * 4 - 1, dst_argb, + EXPECT_EQ(-1, ARGBRotate(src_argb, benchmark_width_ * 4 - 1, dst_argb, abs(benchmark_height_) * 4, benchmark_width_ - 1, benchmark_height_, kRotate270)); @@ -271,7 +271,7 @@ static void TestRotatePlane_16(int src_width, // Rotation should be exact. for (int i = 0; i < dst_plane_size; ++i) { - ASSERT_EQ(dst_c[i], dst_opt[i]); + EXPECT_EQ(dst_c[i], dst_opt[i]); } free_aligned_buffer_page_end_16(dst_c); diff --git a/unit_test/rotate_test.cc b/unit_test/rotate_test.cc index 10ee64cbc..abc08efa8 100644 --- a/unit_test/rotate_test.cc +++ b/unit_test/rotate_test.cc @@ -20,7 +20,7 @@ namespace libyuv { -#define SUBSAMPLE(v, a) ((((v) + (a) - 1)) / (a)) +#define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a)) static void I420TestRotate(int src_width, int src_height, @@ -78,7 +78,7 @@ static void I420TestRotate(int src_width, // Rotation should be exact. for (int i = 0; i < dst_i420_size; ++i) { - ASSERT_EQ(dst_i420_c[i], dst_i420_opt[i]); + EXPECT_EQ(dst_i420_c[i], dst_i420_opt[i]); } free_aligned_buffer_page_end(dst_i420_c); @@ -197,7 +197,7 @@ static void I422TestRotate(int src_width, // Rotation should be exact. for (int i = 0; i < dst_i422_size; ++i) { - ASSERT_EQ(dst_i422_c[i], dst_i422_opt[i]); + EXPECT_EQ(dst_i422_c[i], dst_i422_opt[i]); } free_aligned_buffer_page_end(dst_i422_c); @@ -283,7 +283,7 @@ static void I444TestRotate(int src_width, // Rotation should be exact. for (int i = 0; i < dst_i444_size; ++i) { - ASSERT_EQ(dst_i444_c[i], dst_i444_opt[i]); + EXPECT_EQ(dst_i444_c[i], dst_i444_opt[i]); } free_aligned_buffer_page_end(dst_i444_c); @@ -401,7 +401,7 @@ static void NV12TestRotate(int src_width, // Rotation should be exact. for (int i = 0; i < dst_i420_size; ++i) { - ASSERT_EQ(dst_i420_c[i], dst_i420_opt[i]); + EXPECT_EQ(dst_i420_c[i], dst_i420_opt[i]); } free_aligned_buffer_page_end(dst_i420_c); @@ -495,15 +495,15 @@ TEST_F(LibYUVRotateTest, NV12Rotate270_Invert) { const int kHeight = benchmark_height_; \ const int kSizeUV = \ SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \ - align_buffer_page_end(src_y, kWidth * kHeight + OFF); \ + align_buffer_page_end(src_y, kWidth* kHeight + OFF); \ align_buffer_page_end(src_uv, \ - kSizeUV * ((PIXEL_STRIDE == 3) ? 3 : 2) + OFF); \ - align_buffer_page_end(dst_y_c, kWidth * kHeight); \ + kSizeUV*((PIXEL_STRIDE == 3) ? 3 : 2) + OFF); \ + align_buffer_page_end(dst_y_c, kWidth* kHeight); \ align_buffer_page_end(dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \ SUBSAMPLE(kHeight, SUBSAMP_Y)); \ align_buffer_page_end(dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \ SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - align_buffer_page_end(dst_y_opt, kWidth * kHeight); \ + align_buffer_page_end(dst_y_opt, kWidth* kHeight); \ align_buffer_page_end(dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \ SUBSAMPLE(kHeight, SUBSAMP_Y)); \ align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \ @@ -522,12 +522,12 @@ TEST_F(LibYUVRotateTest, NV12Rotate270_Invert) { (fastrand() & 0xff); \ } \ } \ - memset(dst_y_c, 1, kWidth * kHeight); \ + memset(dst_y_c, 1, kWidth* kHeight); \ memset(dst_u_c, 2, \ SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ memset(dst_v_c, 3, \ SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ - memset(dst_y_opt, 101, kWidth * kHeight); \ + memset(dst_y_opt, 101, kWidth* kHeight); \ memset(dst_u_opt, 102, \ SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \ memset(dst_v_opt, 103, \ @@ -550,18 +550,18 @@ TEST_F(LibYUVRotateTest, NV12Rotate270_Invert) { } \ for (int i = 0; i < kHeight; ++i) { \ for (int j = 0; j < kWidth; ++j) { \ - ASSERT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \ + EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \ } \ } \ for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ - ASSERT_EQ(dst_u_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j], \ + EXPECT_EQ(dst_u_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j], \ dst_u_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]); \ } \ } \ for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \ for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \ - ASSERT_EQ(dst_v_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j], \ + EXPECT_EQ(dst_v_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j], \ dst_v_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]); \ } \ } \ @@ -656,7 +656,7 @@ static void I010TestRotate(int src_width, // Rotation should be exact. for (int i = 0; i < dst_i010_size; ++i) { - ASSERT_EQ(dst_i010_c[i], dst_i010_opt[i]); + EXPECT_EQ(dst_i010_c[i], dst_i010_opt[i]); } free_aligned_buffer_page_end_16(dst_i010_c); @@ -744,7 +744,7 @@ static void I210TestRotate(int src_width, // Rotation should be exact. for (int i = 0; i < dst_i210_size; ++i) { - ASSERT_EQ(dst_i210_c[i], dst_i210_opt[i]); + EXPECT_EQ(dst_i210_c[i], dst_i210_opt[i]); } free_aligned_buffer_page_end_16(dst_i210_c); @@ -830,7 +830,7 @@ static void I410TestRotate(int src_width, // Rotation should be exact. for (int i = 0; i < dst_i410_size; ++i) { - ASSERT_EQ(dst_i410_c[i], dst_i410_opt[i]); + EXPECT_EQ(dst_i410_c[i], dst_i410_opt[i]); } free_aligned_buffer_page_end_16(dst_i410_c); @@ -906,8 +906,8 @@ TEST_F(LibYUVRotateTest, Transpose4x4_Test) { for (int i = 0; i < 4; ++i) { for (int j = 0; j < 4; ++j) { - ASSERT_EQ(dst_pixels_c[i][j], src_pixels[j][i]); - ASSERT_EQ(dst_pixels_c[i][j], dst_pixels_opt[i][j]); + EXPECT_EQ(dst_pixels_c[i][j], src_pixels[j][i]); + EXPECT_EQ(dst_pixels_c[i][j], dst_pixels_opt[i][j]); } } } @@ -949,7 +949,7 @@ TEST_F(LibYUVRotateTest, Transpose4x4_Opt) { } for (int i = 0; i < width * height; ++i) { - ASSERT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); } free_aligned_buffer_page_end(src_pixels); diff --git a/unit_test/scale_argb_test.cc b/unit_test/scale_argb_test.cc index 3d3e36fc5..66fd4cf31 100644 --- a/unit_test/scale_argb_test.cc +++ b/unit_test/scale_argb_test.cc @@ -245,14 +245,14 @@ static int ARGBClipTestFilter(int src_width, DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \ benchmark_cpu_info_); \ - ASSERT_LE(diff, max_diff); \ + EXPECT_LE(diff, max_diff); \ } \ TEST_F(LibYUVScaleTest, DISABLED_##ARGBScaleDownClipBy##name##_##filter) { \ int diff = ARGBClipTestFilter( \ SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \ DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ kFilter##filter, benchmark_iterations_); \ - ASSERT_LE(diff, max_diff); \ + EXPECT_LE(diff, max_diff); \ } // Test a scale factor with all 4 filters. Expect unfiltered to be exact, but @@ -294,28 +294,28 @@ TEST_FACTOR(3, 1, 3) int diff = ARGBTestFilter(benchmark_width_, benchmark_height_, width, \ height, kFilter##filter, benchmark_iterations_, \ disable_cpu_flags_, benchmark_cpu_info_); \ - ASSERT_LE(diff, max_diff); \ + EXPECT_LE(diff, max_diff); \ } \ TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter) { \ int diff = ARGBTestFilter(width, height, Abs(benchmark_width_), \ Abs(benchmark_height_), kFilter##filter, \ benchmark_iterations_, disable_cpu_flags_, \ benchmark_cpu_info_); \ - ASSERT_LE(diff, max_diff); \ + EXPECT_LE(diff, max_diff); \ } \ TEST_F(LibYUVScaleTest, \ DISABLED_##name##ClipTo##width##x##height##_##filter) { \ int diff = \ ARGBClipTestFilter(benchmark_width_, benchmark_height_, width, height, \ kFilter##filter, benchmark_iterations_); \ - ASSERT_LE(diff, max_diff); \ + EXPECT_LE(diff, max_diff); \ } \ TEST_F(LibYUVScaleTest, \ DISABLED_##name##ClipFrom##width##x##height##_##filter) { \ int diff = ARGBClipTestFilter(width, height, Abs(benchmark_width_), \ Abs(benchmark_height_), kFilter##filter, \ benchmark_iterations_); \ - ASSERT_LE(diff, max_diff); \ + EXPECT_LE(diff, max_diff); \ } #ifndef DISABLE_SLOW_TESTS @@ -357,7 +357,7 @@ TEST_SCALETO(ARGBScale, 1920, 1080) benchmark_height_, benchmark_width_, \ kFilter##filter, benchmark_iterations_, \ disable_cpu_flags_, benchmark_cpu_info_); \ - ASSERT_LE(diff, max_diff); \ + EXPECT_LE(diff, max_diff); \ } #if defined(ENABLE_FULL_TESTS) @@ -430,14 +430,12 @@ static void FillRamp(uint8_t* buf, } // Test scaling with C vs Opt and return maximum pixel difference. 0 = exact. -static void YUVToARGBTestFilter(int src_width, - int src_height, - int dst_width, - int dst_height, - FilterMode f, - int benchmark_iterations, - int error_threshold, - int* max_diff_out) { +static int YUVToARGBTestFilter(int src_width, + int src_height, + int dst_width, + int dst_height, + FilterMode f, + int benchmark_iterations) { int64_t src_y_plane_size = Abs(src_width) * Abs(src_height); int64_t src_uv_plane_size = ((Abs(src_width) + 1) / 2) * ((Abs(src_height) + 1) / 2); @@ -448,13 +446,13 @@ static void YUVToARGBTestFilter(int src_width, align_buffer_page_end(src_u, src_uv_plane_size); align_buffer_page_end(src_v, src_uv_plane_size); - int64_t dst_argb_plane_size = (dst_width) * (dst_height) * 4LL; - int dst_stride_argb = (dst_width) * 4; + int64_t dst_argb_plane_size = (dst_width) * (dst_height)*4LL; + int dst_stride_argb = (dst_width)*4; align_buffer_page_end(dst_argb_c, dst_argb_plane_size); align_buffer_page_end(dst_argb_opt, dst_argb_plane_size); if (!dst_argb_c || !dst_argb_opt || !src_y || !src_u || !src_v) { printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n"); - return; + return 0; } // Fill YUV image with continuous ramp, which is less sensitive to // subsampling and filtering differences for test purposes. @@ -483,44 +481,36 @@ static void YUVToARGBTestFilter(int src_width, int abs_diff = Abs(dst_argb_c[(i * dst_stride_argb) + j] - dst_argb_opt[(i * dst_stride_argb) + j]); if (abs_diff > max_diff) { - max_diff = abs_diff; - } - if (abs_diff > error_threshold) { - printf("error %d at %d,%d c %d opt %d\n", abs_diff, j, i, + printf("error %d at %d,%d c %d opt %d", abs_diff, j, i, dst_argb_c[(i * dst_stride_argb) + j], dst_argb_opt[(i * dst_stride_argb) + j]); - goto cleanup; + EXPECT_LE(abs_diff, 40); + max_diff = abs_diff; } } } -cleanup: - if (max_diff_out) { - *max_diff_out = max_diff; - } free_aligned_buffer_page_end(dst_argb_c); free_aligned_buffer_page_end(dst_argb_opt); free_aligned_buffer_page_end(src_y); free_aligned_buffer_page_end(src_u); free_aligned_buffer_page_end(src_v); + return max_diff; } TEST_F(LibYUVScaleTest, YUVToRGBScaleUp) { - int diff = 0; - YUVToARGBTestFilter(benchmark_width_, benchmark_height_, - benchmark_width_ * 3 / 2, benchmark_height_ * 3 / 2, - libyuv::kFilterBilinear, benchmark_iterations_, 10, - &diff); - ASSERT_LE(diff, 10); + int diff = + YUVToARGBTestFilter(benchmark_width_, benchmark_height_, + benchmark_width_ * 3 / 2, benchmark_height_ * 3 / 2, + libyuv::kFilterBilinear, benchmark_iterations_); + EXPECT_LE(diff, 10); } TEST_F(LibYUVScaleTest, YUVToRGBScaleDown) { - int diff = 0; - YUVToARGBTestFilter(benchmark_width_ * 3 / 2, benchmark_height_ * 3 / 2, - benchmark_width_, benchmark_height_, - libyuv::kFilterBilinear, benchmark_iterations_, 10, - &diff); - ASSERT_LE(diff, 10); + int diff = YUVToARGBTestFilter( + benchmark_width_ * 3 / 2, benchmark_height_ * 3 / 2, benchmark_width_, + benchmark_height_, libyuv::kFilterBilinear, benchmark_iterations_); + EXPECT_LE(diff, 10); } TEST_F(LibYUVScaleTest, ARGBTest3x) { @@ -543,18 +533,18 @@ TEST_F(LibYUVScaleTest, ARGBTest3x) { kFilterBilinear); } - ASSERT_EQ(225, dest_pixels[0]); - ASSERT_EQ(255 - 225, dest_pixels[1]); - ASSERT_EQ(226, dest_pixels[2]); - ASSERT_EQ(235, dest_pixels[3]); + EXPECT_EQ(225, dest_pixels[0]); + EXPECT_EQ(255 - 225, dest_pixels[1]); + EXPECT_EQ(226, dest_pixels[2]); + EXPECT_EQ(235, dest_pixels[3]); ARGBScale(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1, kFilterNone); - ASSERT_EQ(225, dest_pixels[0]); - ASSERT_EQ(255 - 225, dest_pixels[1]); - ASSERT_EQ(226, dest_pixels[2]); - ASSERT_EQ(235, dest_pixels[3]); + EXPECT_EQ(225, dest_pixels[0]); + EXPECT_EQ(255 - 225, dest_pixels[1]); + EXPECT_EQ(226, dest_pixels[2]); + EXPECT_EQ(235, dest_pixels[3]); free_aligned_buffer_page_end(dest_pixels); free_aligned_buffer_page_end(orig_pixels); @@ -580,18 +570,18 @@ TEST_F(LibYUVScaleTest, ARGBTest4x) { kFilterBilinear); } - ASSERT_NEAR(66, dest_pixels[0], 4); - ASSERT_NEAR(255 - 66, dest_pixels[1], 4); - ASSERT_NEAR(67, dest_pixels[2], 4); - ASSERT_NEAR(76, dest_pixels[3], 4); + EXPECT_NEAR(66, dest_pixels[0], 4); + EXPECT_NEAR(255 - 66, dest_pixels[1], 4); + EXPECT_NEAR(67, dest_pixels[2], 4); + EXPECT_NEAR(76, dest_pixels[3], 4); ARGBScale(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1, kFilterNone); - ASSERT_EQ(2, dest_pixels[0]); - ASSERT_EQ(255 - 2, dest_pixels[1]); - ASSERT_EQ(3, dest_pixels[2]); - ASSERT_EQ(12, dest_pixels[3]); + EXPECT_EQ(2, dest_pixels[0]); + EXPECT_EQ(255 - 2, dest_pixels[1]); + EXPECT_EQ(3, dest_pixels[2]); + EXPECT_EQ(12, dest_pixels[3]); free_aligned_buffer_page_end(dest_pixels); free_aligned_buffer_page_end(orig_pixels); diff --git a/unit_test/scale_plane_test.cc b/unit_test/scale_plane_test.cc index b04dda10c..979c70aad 100644 --- a/unit_test/scale_plane_test.cc +++ b/unit_test/scale_plane_test.cc @@ -42,108 +42,6 @@ namespace libyuv { -// POC: int row_stride = src_stride * 2 overflows to a small negative value -// when src_stride is close to INT_MAX, causing src_ptr to walk backward -// past the start of the source allocation on the second loop iteration. -// With src_stride = 0x7FFFFFFE, row_stride = (int)0xFFFFFFFC = -4, so on -// y=1 ScaleRowDown2Box reads 4 bytes before the heap allocation. -TEST_F(LibYUVScaleTest, ScalePlaneDown2_RowStrideOverflow) { - constexpr int kSrcStride = 0x7FFFFFFE; // INT_MAX - 1 - constexpr int kSrcW = 64; - constexpr int kSrcH = 4; - constexpr int kDstW = 32; - constexpr int kDstH = 2; - // src_size = (kSrcH - 1) * stride + width. - size_t src_size = kSrcH - 1; - if (src_size > SIZE_MAX / kSrcStride) { - GTEST_SKIP() << "could not represent allocation size in size_t"; - } - src_size *= kSrcStride; - if (src_size > SIZE_MAX - kSrcW) { - GTEST_SKIP() << "could not represent allocation size in size_t"; - } - src_size += kSrcW; - -#if defined(__aarch64__) - // Infer malloc can accept a large size for cpu with dot product (a76/a55) - int has_large_malloc = TestCpuFlag(kCpuHasNeonDotProd); -#else - int has_large_malloc = 1; -#endif - if (!has_large_malloc) { - GTEST_SKIP() << "large allocation may assert for " << src_size << " bytes"; - } - - uint8_t* src = new (std::nothrow) uint8_t[src_size]; - if (!src) { - GTEST_SKIP() << "could not allocate " << src_size << " bytes"; - } - uint8_t dst[kDstW * kDstH]; - uint8_t* src_row = src; - for (int i = 0; i < kSrcH; i++) { - memset(src_row, 0x41, kSrcW); - src_row += kSrcStride; - } - // Force the C row kernel: the SIMD kernels are inline asm that ASAN does not - // instrument, so they silently read OOB without a report. - MaskCpuFlags(1); - // 2*dst == src on both axes -> ScalePlane dispatches to ScalePlaneDown2. - // int row_stride = kSrcStride * 2 wraps to -4; on y=1 src_ptr underflows. - ScalePlane(src, kSrcStride, kSrcW, kSrcH, dst, kDstW, kDstW, kDstH, - kFilterBox); - MaskCpuFlags(0); - delete[] src; -} - -// POC: same defect in the 1/4 fast path. src_stride = 0x3FFFFFFF gives -// int row_stride = src_stride * 4 = (int)0xFFFFFFFC = -4. -TEST_F(LibYUVScaleTest, ScalePlaneDown4_RowStrideOverflow) { - constexpr int kSrcStride = 0x3FFFFFFF; // INT_MAX / 4 (rounded down) - constexpr int kSrcW = 64; - constexpr int kSrcH = 8; - constexpr int kDstW = 16; - constexpr int kDstH = 2; - // src_size = (kSrcH - 1) * stride + width. - size_t src_size = kSrcH - 1; - if (src_size > SIZE_MAX / kSrcStride) { - GTEST_SKIP() << "could not represent allocation size in size_t"; - } - src_size *= kSrcStride; - if (src_size > SIZE_MAX - kSrcW) { - GTEST_SKIP() << "could not represent allocation size in size_t"; - } - src_size += kSrcW; - -#if defined(__aarch64__) - // Infer malloc can accept a large size for cpu with dot product (a76/a55) - int has_large_malloc = TestCpuFlag(kCpuHasNeonDotProd); -#else - int has_large_malloc = 1; -#endif - if (!has_large_malloc) { - GTEST_SKIP() << "large allocation may assert for " << src_size << " bytes"; - } - - uint8_t* src = new (std::nothrow) uint8_t[src_size]; - if (!src) { - GTEST_SKIP() << "could not allocate " << src_size << " bytes"; - } - uint8_t dst[kDstW * kDstH]; - uint8_t* src_row = src; - for (int i = 0; i < kSrcH; i++) { - memset(src_row, 0x41, kSrcW); - src_row += kSrcStride; - } - // Force the C row kernel: the SIMD kernels are inline asm that ASAN does not - // instrument, so they silently read OOB without a report. - MaskCpuFlags(1); - // 4*dst == src on both axes with kFilterBox -> ScalePlaneDown4. - ScalePlane(src, kSrcStride, kSrcW, kSrcH, dst, kDstW, kDstW, kDstH, - kFilterBox); - MaskCpuFlags(0); - delete[] src; -} - #ifdef ENABLE_ROW_TESTS #ifdef HAS_SCALEROWDOWN2_SSSE3 TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_Odd_SSSE3) { @@ -187,49 +85,49 @@ TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_Odd_SSSE3) { // Test regular half size. ScaleRowDown2Box_C(orig_pixels, 128, dst_pixels_c, 64); - ASSERT_EQ(64u, dst_pixels_c[0]); - ASSERT_EQ(25u, dst_pixels_c[1]); - ASSERT_EQ(13u, dst_pixels_c[2]); - ASSERT_EQ(5u, dst_pixels_c[3]); - ASSERT_EQ(0u, dst_pixels_c[4]); - ASSERT_EQ(133u, dst_pixels_c[63]); + EXPECT_EQ(64u, dst_pixels_c[0]); + EXPECT_EQ(25u, dst_pixels_c[1]); + EXPECT_EQ(13u, dst_pixels_c[2]); + EXPECT_EQ(5u, dst_pixels_c[3]); + EXPECT_EQ(0u, dst_pixels_c[4]); + EXPECT_EQ(133u, dst_pixels_c[63]); // Test Odd width version - Last pixel is just 1 horizontal pixel. ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64); - ASSERT_EQ(64u, dst_pixels_c[0]); - ASSERT_EQ(25u, dst_pixels_c[1]); - ASSERT_EQ(13u, dst_pixels_c[2]); - ASSERT_EQ(5u, dst_pixels_c[3]); - ASSERT_EQ(0u, dst_pixels_c[4]); - ASSERT_EQ(10u, dst_pixels_c[63]); + EXPECT_EQ(64u, dst_pixels_c[0]); + EXPECT_EQ(25u, dst_pixels_c[1]); + EXPECT_EQ(13u, dst_pixels_c[2]); + EXPECT_EQ(5u, dst_pixels_c[3]); + EXPECT_EQ(0u, dst_pixels_c[4]); + EXPECT_EQ(10u, dst_pixels_c[63]); // Test one pixel less, should skip the last pixel. memset(dst_pixels_c, 0, sizeof(dst_pixels_c)); ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 63); - ASSERT_EQ(64u, dst_pixels_c[0]); - ASSERT_EQ(25u, dst_pixels_c[1]); - ASSERT_EQ(13u, dst_pixels_c[2]); - ASSERT_EQ(5u, dst_pixels_c[3]); - ASSERT_EQ(0u, dst_pixels_c[4]); - ASSERT_EQ(0u, dst_pixels_c[63]); + EXPECT_EQ(64u, dst_pixels_c[0]); + EXPECT_EQ(25u, dst_pixels_c[1]); + EXPECT_EQ(13u, dst_pixels_c[2]); + EXPECT_EQ(5u, dst_pixels_c[3]); + EXPECT_EQ(0u, dst_pixels_c[4]); + EXPECT_EQ(0u, dst_pixels_c[63]); // Test regular half size SSSE3. ScaleRowDown2Box_SSSE3(orig_pixels, 128, dst_pixels_opt, 64); - ASSERT_EQ(64u, dst_pixels_opt[0]); - ASSERT_EQ(25u, dst_pixels_opt[1]); - ASSERT_EQ(13u, dst_pixels_opt[2]); - ASSERT_EQ(5u, dst_pixels_opt[3]); - ASSERT_EQ(0u, dst_pixels_opt[4]); - ASSERT_EQ(133u, dst_pixels_opt[63]); + EXPECT_EQ(64u, dst_pixels_opt[0]); + EXPECT_EQ(25u, dst_pixels_opt[1]); + EXPECT_EQ(13u, dst_pixels_opt[2]); + EXPECT_EQ(5u, dst_pixels_opt[3]); + EXPECT_EQ(0u, dst_pixels_opt[4]); + EXPECT_EQ(133u, dst_pixels_opt[63]); // Compare C and SSSE3 match. ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64); ScaleRowDown2Box_Odd_SSSE3(orig_pixels, 128, dst_pixels_opt, 64); for (int i = 0; i < 64; ++i) { - ASSERT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); } } } @@ -262,11 +160,11 @@ TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_16) { } for (int i = 0; i < 1280; ++i) { - ASSERT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); + EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); } - ASSERT_EQ(dst_pixels_c[0], (0 + 1 + 2560 + 2561 + 2) / 4); - ASSERT_EQ(dst_pixels_c[1279], 3839); + EXPECT_EQ(dst_pixels_c[0], (0 + 1 + 2560 + 2561 + 2) / 4); + EXPECT_EQ(dst_pixels_c[1279], 3839); } #endif // ENABLE_ROW_TESTS @@ -346,7 +244,7 @@ static int TestPlaneFilter_16(int src_width, DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \ benchmark_cpu_info_); \ - ASSERT_LE(diff, max_diff); \ + EXPECT_LE(diff, max_diff); \ } // Test a scale factor with all 4 filters. Expect unfiltered to be exact, but @@ -385,12 +283,12 @@ TEST_F(LibYUVScaleTest, PlaneTest3x) { kFilterBilinear); } - ASSERT_EQ(225, dest_pixels[0]); + EXPECT_EQ(225, dest_pixels[0]); ScalePlane(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1, kFilterNone); - ASSERT_EQ(225, dest_pixels[0]); + EXPECT_EQ(225, dest_pixels[0]); free_aligned_buffer_page_end(dest_pixels); free_aligned_buffer_page_end(orig_pixels); @@ -413,12 +311,12 @@ TEST_F(LibYUVScaleTest, PlaneTest4x) { kFilterBilinear); } - ASSERT_EQ(66, dest_pixels[0]); + EXPECT_EQ(66, dest_pixels[0]); ScalePlane(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1, kFilterNone); - ASSERT_EQ(2, dest_pixels[0]); // expect the 3rd pixel of the 3rd row + EXPECT_EQ(2, dest_pixels[0]); // expect the 3rd pixel of the 3rd row free_aligned_buffer_page_end(dest_pixels); free_aligned_buffer_page_end(orig_pixels); @@ -447,7 +345,7 @@ TEST_F(LibYUVScaleTest, PlaneTestRotate_None) { } for (int i = 0; i < kSize; ++i) { - ASSERT_EQ(dest_c_pixels[i], dest_opt_pixels[i]); + EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]); } free_aligned_buffer_page_end(dest_c_pixels); @@ -477,7 +375,7 @@ TEST_F(LibYUVScaleTest, PlaneTestRotate_Bilinear) { } for (int i = 0; i < kSize; ++i) { - ASSERT_EQ(dest_c_pixels[i], dest_opt_pixels[i]); + EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]); } free_aligned_buffer_page_end(dest_c_pixels); @@ -508,7 +406,7 @@ TEST_F(LibYUVScaleTest, PlaneTestRotate_Box) { } for (int i = 0; i < kSize; ++i) { - ASSERT_EQ(dest_c_pixels[i], dest_opt_pixels[i]); + EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]); } free_aligned_buffer_page_end(dest_c_pixels); @@ -534,9 +432,9 @@ TEST_F(LibYUVScaleTest, PlaneTest1_Box) { /* dst_width= */ 1, /* dst_height= */ 2, libyuv::kFilterBox); - ASSERT_EQ(dst_pixels[0], 1); - ASSERT_EQ(dst_pixels[1], 1); - ASSERT_EQ(dst_pixels[2], 3); + EXPECT_EQ(dst_pixels[0], 1); + EXPECT_EQ(dst_pixels[1], 1); + EXPECT_EQ(dst_pixels[2], 3); free_aligned_buffer_page_end(dst_pixels); free_aligned_buffer_page_end(orig_pixels); @@ -562,9 +460,9 @@ TEST_F(LibYUVScaleTest, PlaneTest1_16_Box) { /* src_height= */ 1, dst_pixels, /* dst_stride= */ 1, /* dst_width= */ 1, /* dst_height= */ 2, libyuv::kFilterNone); - ASSERT_EQ(dst_pixels[0], 1); - ASSERT_EQ(dst_pixels[1], 1); - ASSERT_EQ(dst_pixels[2], 3); + EXPECT_EQ(dst_pixels[0], 1); + EXPECT_EQ(dst_pixels[1], 1); + EXPECT_EQ(dst_pixels[2], 3); free_aligned_buffer_page_end(dst_pixels_alloc); free_aligned_buffer_page_end(orig_pixels_alloc); @@ -631,58 +529,9 @@ TEST_F(LibYUVScaleTest, ScalePlaneVertical_IntStrideOverflow) { kDstHeight, kFilterNone); // Not reached under ASAN. - ASSERT_EQ(0, r); + EXPECT_EQ(0, r); delete[] src; delete[] dst; } -TEST_F(LibYUVScaleTest, ScalePlane_InvalidInputs) { - uint8_t src[16] = {0}; - uint8_t dst[16] = {0}; - - // NULL src/dst - EXPECT_EQ(-1, ScalePlane(nullptr, 4, 4, 4, dst, 4, 4, 4, kFilterNone)); - EXPECT_EQ(-1, ScalePlane(src, 4, 4, 4, nullptr, 4, 4, 4, kFilterNone)); - - // Width/height <= 0 (except src_height which can be negative but not 0) - EXPECT_EQ(-1, ScalePlane(src, 4, 0, 4, dst, 4, 4, 4, kFilterNone)); - EXPECT_EQ(-1, ScalePlane(src, 4, -1, 4, dst, 4, 4, 4, kFilterNone)); - EXPECT_EQ(-1, ScalePlane(src, 4, 4, 0, dst, 4, 4, 4, kFilterNone)); - EXPECT_EQ(-1, ScalePlane(src, 4, 4, 4, dst, 4, 0, 4, kFilterNone)); - EXPECT_EQ(-1, ScalePlane(src, 4, 4, 4, dst, 4, -1, 4, kFilterNone)); - EXPECT_EQ(-1, ScalePlane(src, 4, 4, 4, dst, 4, 4, 0, kFilterNone)); - EXPECT_EQ(-1, ScalePlane(src, 4, 4, 4, dst, 4, 4, -1, kFilterNone)); - - // Width/height too large (> 32768) - EXPECT_EQ(-1, ScalePlane(src, 4, 32769, 4, dst, 4, 4, 4, kFilterNone)); - EXPECT_EQ(-1, ScalePlane(src, 4, 4, 32769, dst, 4, 4, 4, kFilterNone)); - EXPECT_EQ(-1, ScalePlane(src, 4, 4, -32769, dst, 4, 4, 4, kFilterNone)); - - // Valid edge cases - EXPECT_EQ(0, ScalePlane(src, 4, 1, 1, dst, 4, 1, 1, kFilterNone)); - EXPECT_EQ(0, ScalePlane(src, 4, 1, -1, dst, 4, 1, 1, kFilterNone)); -} - -TEST_F(LibYUVScaleTest, ScalePlane_16_InvalidInputs) { - uint16_t src[16] = {0}; - uint16_t dst[16] = {0}; - - EXPECT_EQ(-1, ScalePlane_16(nullptr, 4, 4, 4, dst, 4, 4, 4, kFilterNone)); - EXPECT_EQ(-1, ScalePlane_16(src, 4, 4, 4, nullptr, 4, 4, 4, kFilterNone)); - EXPECT_EQ(-1, ScalePlane_16(src, 4, 0, 4, dst, 4, 4, 4, kFilterNone)); - EXPECT_EQ(-1, ScalePlane_16(src, 4, 32769, 4, dst, 4, 4, 4, kFilterNone)); - EXPECT_EQ(-1, ScalePlane_16(src, 4, 4, -32769, dst, 4, 4, 4, kFilterNone)); -} - -TEST_F(LibYUVScaleTest, ScalePlane_12_InvalidInputs) { - uint16_t src[16] = {0}; - uint16_t dst[16] = {0}; - - EXPECT_EQ(-1, ScalePlane_12(nullptr, 4, 4, 4, dst, 4, 4, 4, kFilterNone)); - EXPECT_EQ(-1, ScalePlane_12(src, 4, 4, 4, nullptr, 4, 4, 4, kFilterNone)); - EXPECT_EQ(-1, ScalePlane_12(src, 4, 0, 4, dst, 4, 4, 4, kFilterNone)); - EXPECT_EQ(-1, ScalePlane_12(src, 4, 32769, 4, dst, 4, 4, 4, kFilterNone)); - EXPECT_EQ(-1, ScalePlane_12(src, 4, 4, -32769, dst, 4, 4, 4, kFilterNone)); -} - } // namespace libyuv diff --git a/unit_test/scale_rgb_test.cc b/unit_test/scale_rgb_test.cc index f6fa1e8ca..8296abe31 100644 --- a/unit_test/scale_rgb_test.cc +++ b/unit_test/scale_rgb_test.cc @@ -128,7 +128,7 @@ static int RGBTestFilter(int src_width, DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \ benchmark_cpu_info_); \ - ASSERT_LE(diff, max_diff); \ + EXPECT_LE(diff, max_diff); \ } #if defined(ENABLE_FULL_TESTS) @@ -163,14 +163,14 @@ TEST_FACTOR(3, 1, 3) int diff = RGBTestFilter(benchmark_width_, benchmark_height_, width, \ height, kFilter##filter, benchmark_iterations_, \ disable_cpu_flags_, benchmark_cpu_info_); \ - ASSERT_LE(diff, max_diff); \ + EXPECT_LE(diff, max_diff); \ } \ TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter) { \ int diff = RGBTestFilter(width, height, Abs(benchmark_width_), \ Abs(benchmark_height_), kFilter##filter, \ benchmark_iterations_, disable_cpu_flags_, \ benchmark_cpu_info_); \ - ASSERT_LE(diff, max_diff); \ + EXPECT_LE(diff, max_diff); \ } #if defined(ENABLE_FULL_TESTS) @@ -202,7 +202,7 @@ TEST_SCALETO(RGBScale, 1920, 1080) benchmark_height_, benchmark_width_, \ kFilter##filter, benchmark_iterations_, \ disable_cpu_flags_, benchmark_cpu_info_); \ - ASSERT_LE(diff, max_diff); \ + EXPECT_LE(diff, max_diff); \ } #if defined(ENABLE_FULL_TESTS) @@ -233,14 +233,14 @@ TEST_F(LibYUVScaleTest, RGBTest3x) { kFilterBilinear); } - ASSERT_EQ(225, dest_pixels[0]); - ASSERT_EQ(255 - 225, dest_pixels[1]); + EXPECT_EQ(225, dest_pixels[0]); + EXPECT_EQ(255 - 225, dest_pixels[1]); RGBScale(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1, kFilterNone); - ASSERT_EQ(225, dest_pixels[0]); - ASSERT_EQ(255 - 225, dest_pixels[1]); + EXPECT_EQ(225, dest_pixels[0]); + EXPECT_EQ(255 - 225, dest_pixels[1]); free_aligned_buffer_page_end(dest_pixels); free_aligned_buffer_page_end(orig_pixels); @@ -264,14 +264,14 @@ TEST_F(LibYUVScaleTest, RGBTest4x) { kFilterBilinear); } - ASSERT_EQ(66, dest_pixels[0]); - ASSERT_EQ(190, dest_pixels[1]); + EXPECT_EQ(66, dest_pixels[0]); + EXPECT_EQ(190, dest_pixels[1]); RGBScale(orig_pixels, kSrcStride, 64, 4, dest_pixels, kDstStride, 16, 1, kFilterNone); - ASSERT_EQ(2, dest_pixels[0]); // expect the 3rd pixel of the 3rd row - ASSERT_EQ(255 - 2, dest_pixels[1]); + EXPECT_EQ(2, dest_pixels[0]); // expect the 3rd pixel of the 3rd row + EXPECT_EQ(255 - 2, dest_pixels[1]); free_aligned_buffer_page_end(dest_pixels); free_aligned_buffer_page_end(orig_pixels); diff --git a/unit_test/scale_test.cc b/unit_test/scale_test.cc index 323094f3f..299fd2381 100644 --- a/unit_test/scale_test.cc +++ b/unit_test/scale_test.cc @@ -757,7 +757,7 @@ static int NV12TestFilter(int src_width, int src_height_uv = (Abs(src_height) + 1) >> 1; int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height)); - int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv) * 2; + int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv)*2; int src_stride_y = Abs(src_width); int src_stride_uv = src_width_uv * 2; @@ -775,7 +775,7 @@ static int NV12TestFilter(int src_width, int dst_height_uv = (dst_height + 1) >> 1; int64_t dst_y_plane_size = (dst_width) * (dst_height); - int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv) * 2; + int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv)*2; int dst_stride_y = dst_width; int dst_stride_uv = dst_width_uv * 2; @@ -856,7 +856,7 @@ static int NV12TestFilter(int src_width, DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \ benchmark_cpu_info_); \ - ASSERT_LE(diff, max_diff); \ + EXPECT_LE(diff, max_diff); \ } \ TEST_F(LibYUVScaleTest, I444ScaleDownBy##name##_##filter) { \ int diff = I444TestFilter( \ @@ -864,7 +864,7 @@ static int NV12TestFilter(int src_width, DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \ benchmark_cpu_info_); \ - ASSERT_LE(diff, max_diff); \ + EXPECT_LE(diff, max_diff); \ } \ TEST_F(LibYUVScaleTest, DISABLED_##I420ScaleDownBy##name##_##filter##_12) { \ int diff = I420TestFilter_12( \ @@ -872,7 +872,7 @@ static int NV12TestFilter(int src_width, DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \ benchmark_cpu_info_); \ - ASSERT_LE(diff, max_diff); \ + EXPECT_LE(diff, max_diff); \ } \ TEST_F(LibYUVScaleTest, DISABLED_##I444ScaleDownBy##name##_##filter##_12) { \ int diff = I444TestFilter_12( \ @@ -880,7 +880,7 @@ static int NV12TestFilter(int src_width, DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \ benchmark_cpu_info_); \ - ASSERT_LE(diff, max_diff); \ + EXPECT_LE(diff, max_diff); \ } \ TEST_F(LibYUVScaleTest, NV12ScaleDownBy##name##_##filter) { \ int diff = NV12TestFilter( \ @@ -888,7 +888,7 @@ static int NV12TestFilter(int src_width, DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \ benchmark_cpu_info_); \ - ASSERT_LE(diff, max_diff); \ + EXPECT_LE(diff, max_diff); \ } // Test a scale factor with all 4 filters. Expect unfiltered to be exact, but @@ -931,61 +931,61 @@ TEST_FACTOR(3, 1, 3, 0) int diff = I420TestFilter(benchmark_width_, benchmark_height_, width, \ height, kFilter##filter, benchmark_iterations_, \ disable_cpu_flags_, benchmark_cpu_info_); \ - ASSERT_LE(diff, max_diff); \ + EXPECT_LE(diff, max_diff); \ } \ TEST_F(LibYUVScaleTest, I444##name##To##width##x##height##_##filter) { \ int diff = I444TestFilter(benchmark_width_, benchmark_height_, width, \ height, kFilter##filter, benchmark_iterations_, \ disable_cpu_flags_, benchmark_cpu_info_); \ - ASSERT_LE(diff, max_diff); \ + EXPECT_LE(diff, max_diff); \ } \ TEST_F(LibYUVScaleTest, \ DISABLED_##I420##name##To##width##x##height##_##filter##_12) { \ int diff = I420TestFilter_12( \ benchmark_width_, benchmark_height_, width, height, kFilter##filter, \ benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \ - ASSERT_LE(diff, max_diff); \ + EXPECT_LE(diff, max_diff); \ } \ TEST_F(LibYUVScaleTest, \ DISABLED_##I444##name##To##width##x##height##_##filter##_12) { \ int diff = I444TestFilter_12( \ benchmark_width_, benchmark_height_, width, height, kFilter##filter, \ benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \ - ASSERT_LE(diff, max_diff); \ + EXPECT_LE(diff, max_diff); \ } \ TEST_F(LibYUVScaleTest, \ DISABLED_##I420##name##To##width##x##height##_##filter##_16) { \ int diff = I420TestFilter_16( \ benchmark_width_, benchmark_height_, width, height, kFilter##filter, \ benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \ - ASSERT_LE(diff, max_diff); \ + EXPECT_LE(diff, max_diff); \ } \ TEST_F(LibYUVScaleTest, \ DISABLED_##I444##name##To##width##x##height##_##filter##_16) { \ int diff = I444TestFilter_16( \ benchmark_width_, benchmark_height_, width, height, kFilter##filter, \ benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \ - ASSERT_LE(diff, max_diff); \ + EXPECT_LE(diff, max_diff); \ } \ TEST_F(LibYUVScaleTest, NV12##name##To##width##x##height##_##filter) { \ int diff = NV12TestFilter(benchmark_width_, benchmark_height_, width, \ height, kFilter##filter, benchmark_iterations_, \ disable_cpu_flags_, benchmark_cpu_info_); \ - ASSERT_LE(diff, max_diff); \ + EXPECT_LE(diff, max_diff); \ } \ TEST_F(LibYUVScaleTest, I420##name##From##width##x##height##_##filter) { \ int diff = I420TestFilter(width, height, Abs(benchmark_width_), \ Abs(benchmark_height_), kFilter##filter, \ benchmark_iterations_, disable_cpu_flags_, \ benchmark_cpu_info_); \ - ASSERT_LE(diff, max_diff); \ + EXPECT_LE(diff, max_diff); \ } \ TEST_F(LibYUVScaleTest, I444##name##From##width##x##height##_##filter) { \ int diff = I444TestFilter(width, height, Abs(benchmark_width_), \ Abs(benchmark_height_), kFilter##filter, \ benchmark_iterations_, disable_cpu_flags_, \ benchmark_cpu_info_); \ - ASSERT_LE(diff, max_diff); \ + EXPECT_LE(diff, max_diff); \ } \ TEST_F(LibYUVScaleTest, \ DISABLED_##I420##name##From##width##x##height##_##filter##_12) { \ @@ -993,7 +993,7 @@ TEST_FACTOR(3, 1, 3, 0) Abs(benchmark_height_), kFilter##filter, \ benchmark_iterations_, disable_cpu_flags_, \ benchmark_cpu_info_); \ - ASSERT_LE(diff, max_diff); \ + EXPECT_LE(diff, max_diff); \ } \ TEST_F(LibYUVScaleTest, \ DISABLED_##I444##name##From##width##x##height##_##filter##_12) { \ @@ -1001,7 +1001,7 @@ TEST_FACTOR(3, 1, 3, 0) Abs(benchmark_height_), kFilter##filter, \ benchmark_iterations_, disable_cpu_flags_, \ benchmark_cpu_info_); \ - ASSERT_LE(diff, max_diff); \ + EXPECT_LE(diff, max_diff); \ } \ TEST_F(LibYUVScaleTest, \ DISABLED_##I420##name##From##width##x##height##_##filter##_16) { \ @@ -1009,7 +1009,7 @@ TEST_FACTOR(3, 1, 3, 0) Abs(benchmark_height_), kFilter##filter, \ benchmark_iterations_, disable_cpu_flags_, \ benchmark_cpu_info_); \ - ASSERT_LE(diff, max_diff); \ + EXPECT_LE(diff, max_diff); \ } \ TEST_F(LibYUVScaleTest, \ DISABLED_##I444##name##From##width##x##height##_##filter##_16) { \ @@ -1017,14 +1017,14 @@ TEST_FACTOR(3, 1, 3, 0) Abs(benchmark_height_), kFilter##filter, \ benchmark_iterations_, disable_cpu_flags_, \ benchmark_cpu_info_); \ - ASSERT_LE(diff, max_diff); \ + EXPECT_LE(diff, max_diff); \ } \ TEST_F(LibYUVScaleTest, NV12##name##From##width##x##height##_##filter) { \ int diff = NV12TestFilter(width, height, Abs(benchmark_width_), \ Abs(benchmark_height_), kFilter##filter, \ benchmark_iterations_, disable_cpu_flags_, \ benchmark_cpu_info_); \ - ASSERT_LE(diff, max_diff); \ + EXPECT_LE(diff, max_diff); \ } #ifndef DISABLE_SLOW_TESTS @@ -1068,49 +1068,49 @@ TEST_SCALETO(Scale, 1080, 1920) // for rotated phones benchmark_height_, benchmark_width_, \ kFilter##filter, benchmark_iterations_, \ disable_cpu_flags_, benchmark_cpu_info_); \ - ASSERT_LE(diff, max_diff); \ + EXPECT_LE(diff, max_diff); \ } \ TEST_F(LibYUVScaleTest, I444##name##SwapXY_##filter) { \ int diff = I444TestFilter(benchmark_width_, benchmark_height_, \ benchmark_height_, benchmark_width_, \ kFilter##filter, benchmark_iterations_, \ disable_cpu_flags_, benchmark_cpu_info_); \ - ASSERT_LE(diff, max_diff); \ + EXPECT_LE(diff, max_diff); \ } \ TEST_F(LibYUVScaleTest, DISABLED_##I420##name##SwapXY_##filter##_12) { \ int diff = I420TestFilter_12(benchmark_width_, benchmark_height_, \ benchmark_height_, benchmark_width_, \ kFilter##filter, benchmark_iterations_, \ disable_cpu_flags_, benchmark_cpu_info_); \ - ASSERT_LE(diff, max_diff); \ + EXPECT_LE(diff, max_diff); \ } \ TEST_F(LibYUVScaleTest, DISABLED_##I444##name##SwapXY_##filter##_12) { \ int diff = I444TestFilter_12(benchmark_width_, benchmark_height_, \ benchmark_height_, benchmark_width_, \ kFilter##filter, benchmark_iterations_, \ disable_cpu_flags_, benchmark_cpu_info_); \ - ASSERT_LE(diff, max_diff); \ + EXPECT_LE(diff, max_diff); \ } \ TEST_F(LibYUVScaleTest, DISABLED_##I420##name##SwapXY_##filter##_16) { \ int diff = I420TestFilter_16(benchmark_width_, benchmark_height_, \ benchmark_height_, benchmark_width_, \ kFilter##filter, benchmark_iterations_, \ disable_cpu_flags_, benchmark_cpu_info_); \ - ASSERT_LE(diff, max_diff); \ + EXPECT_LE(diff, max_diff); \ } \ TEST_F(LibYUVScaleTest, DISABLED_##I444##name##SwapXY_##filter##_16) { \ int diff = I444TestFilter_16(benchmark_width_, benchmark_height_, \ benchmark_height_, benchmark_width_, \ kFilter##filter, benchmark_iterations_, \ disable_cpu_flags_, benchmark_cpu_info_); \ - ASSERT_LE(diff, max_diff); \ + EXPECT_LE(diff, max_diff); \ } \ TEST_F(LibYUVScaleTest, NV12##name##SwapXY_##filter) { \ int diff = NV12TestFilter(benchmark_width_, benchmark_height_, \ benchmark_height_, benchmark_width_, \ kFilter##filter, benchmark_iterations_, \ disable_cpu_flags_, benchmark_cpu_info_); \ - ASSERT_LE(diff, max_diff); \ + EXPECT_LE(diff, max_diff); \ } // Test scale to a specified size with all 4 filters. diff --git a/unit_test/scale_uv_test.cc b/unit_test/scale_uv_test.cc index df1e4c54c..dab217c97 100644 --- a/unit_test/scale_uv_test.cc +++ b/unit_test/scale_uv_test.cc @@ -101,7 +101,7 @@ static int UVTestFilter(int src_width, DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \ kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \ benchmark_cpu_info_); \ - ASSERT_EQ(0, diff); \ + EXPECT_EQ(0, diff); \ } #if defined(ENABLE_FULL_TESTS) @@ -132,14 +132,14 @@ TEST_FACTOR(3, 1, 3) int diff = UVTestFilter(benchmark_width_, benchmark_height_, width, \ height, kFilter##filter, benchmark_iterations_, \ disable_cpu_flags_, benchmark_cpu_info_); \ - ASSERT_LE(diff, max_diff); \ + EXPECT_LE(diff, max_diff); \ } \ TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter) { \ int diff = UVTestFilter(width, height, Abs(benchmark_width_), \ Abs(benchmark_height_), kFilter##filter, \ benchmark_iterations_, disable_cpu_flags_, \ benchmark_cpu_info_); \ - ASSERT_LE(diff, max_diff); \ + EXPECT_LE(diff, max_diff); \ } #if defined(ENABLE_FULL_TESTS) @@ -171,7 +171,7 @@ TEST_SCALETO(UVScale, 1920, 1080) UVTestFilter(benchmark_width_, benchmark_height_, benchmark_height_, \ benchmark_width_, kFilter##filter, benchmark_iterations_, \ disable_cpu_flags_, benchmark_cpu_info_); \ - ASSERT_LE(diff, max_diff); \ + EXPECT_LE(diff, max_diff); \ } #if defined(ENABLE_FULL_TESTS) @@ -202,14 +202,14 @@ TEST_F(LibYUVScaleTest, UVTest3x) { kFilterBilinear); } - ASSERT_EQ(225, dest_pixels[0]); - ASSERT_EQ(255 - 225, dest_pixels[1]); + EXPECT_EQ(225, dest_pixels[0]); + EXPECT_EQ(255 - 225, dest_pixels[1]); UVScale(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1, kFilterNone); - ASSERT_EQ(225, dest_pixels[0]); - ASSERT_EQ(255 - 225, dest_pixels[1]); + EXPECT_EQ(225, dest_pixels[0]); + EXPECT_EQ(255 - 225, dest_pixels[1]); free_aligned_buffer_page_end(dest_pixels); free_aligned_buffer_page_end(orig_pixels); @@ -233,14 +233,14 @@ TEST_F(LibYUVScaleTest, UVTest4x) { kFilterBilinear); } - ASSERT_EQ(66, dest_pixels[0]); - ASSERT_EQ(190, dest_pixels[1]); + EXPECT_EQ(66, dest_pixels[0]); + EXPECT_EQ(190, dest_pixels[1]); UVScale(orig_pixels, kSrcStride, 64, 4, dest_pixels, kDstStride, 16, 1, kFilterNone); - ASSERT_EQ(2, dest_pixels[0]); // expect the 3rd pixel of the 3rd row - ASSERT_EQ(255 - 2, dest_pixels[1]); + EXPECT_EQ(2, dest_pixels[0]); // expect the 3rd pixel of the 3rd row + EXPECT_EQ(255 - 2, dest_pixels[1]); free_aligned_buffer_page_end(dest_pixels); free_aligned_buffer_page_end(orig_pixels); diff --git a/unit_test/unit_test.cc b/unit_test/unit_test.cc index 163e3ffdb..b737a0321 100644 --- a/unit_test/unit_test.cc +++ b/unit_test/unit_test.cc @@ -169,6 +169,9 @@ static int TestCpuEnv(int cpu_info) { if (TestEnv("LIBYUV_DISABLE_AMXINT8")) { cpu_info &= ~libyuv::kCpuHasAMXINT8; } + if (TestEnv("LIBYUV_DISABLE_AVX512BMM")) { + cpu_info &= ~libyuv::kCpuHasAVX512BMM; + } #endif if (TestEnv("LIBYUV_DISABLE_ASM")) { cpu_info = libyuv::kCpuInitialized; diff --git a/unit_test/unit_test.h b/unit_test/unit_test.h index e9a55c62f..2c11c983f 100644 --- a/unit_test/unit_test.h +++ b/unit_test/unit_test.h @@ -85,11 +85,10 @@ static inline bool SizeValid(int src_width, #define align_buffer_page_end_16(var, size) \ uint16_t* var = NULL; \ uint8_t* var##_mem = \ - reinterpret_cast(malloc(((size) * 2 + 4095 + 63) & ~4095)); \ + reinterpret_cast(malloc(((size)*2 + 4095 + 63) & ~4095)); \ if (var##_mem) \ var = reinterpret_cast( \ - (intptr_t)(var##_mem + (((size) * 2 + 4095 + 63) & ~4095) - \ - (size) * 2) & \ + (intptr_t)(var##_mem + (((size)*2 + 4095 + 63) & ~4095) - (size)*2) & \ ~63) #define free_aligned_buffer_page_end_16(var) \ diff --git a/unit_test/video_common_test.cc b/unit_test/video_common_test.cc index 9ff99faac..36728ea90 100644 --- a/unit_test/video_common_test.cc +++ b/unit_test/video_common_test.cc @@ -36,77 +36,77 @@ static bool TestValidFourCC(uint32_t fourcc, int bpp) { } TEST_F(LibYUVBaseTest, TestCanonicalFourCC) { - ASSERT_EQ(static_cast(FOURCC_I420), CanonicalFourCC(FOURCC_IYUV)); - ASSERT_EQ(static_cast(FOURCC_I420), CanonicalFourCC(FOURCC_YU12)); - ASSERT_EQ(static_cast(FOURCC_I422), CanonicalFourCC(FOURCC_YU16)); - ASSERT_EQ(static_cast(FOURCC_I444), CanonicalFourCC(FOURCC_YU24)); - ASSERT_EQ(static_cast(FOURCC_YUY2), CanonicalFourCC(FOURCC_YUYV)); - ASSERT_EQ(static_cast(FOURCC_YUY2), CanonicalFourCC(FOURCC_YUVS)); - ASSERT_EQ(static_cast(FOURCC_UYVY), CanonicalFourCC(FOURCC_HDYC)); - ASSERT_EQ(static_cast(FOURCC_UYVY), CanonicalFourCC(FOURCC_2VUY)); - ASSERT_EQ(static_cast(FOURCC_MJPG), CanonicalFourCC(FOURCC_JPEG)); - ASSERT_EQ(static_cast(FOURCC_MJPG), CanonicalFourCC(FOURCC_DMB1)); - ASSERT_EQ(static_cast(FOURCC_RAW), CanonicalFourCC(FOURCC_RGB3)); - ASSERT_EQ(static_cast(FOURCC_24BG), CanonicalFourCC(FOURCC_BGR3)); - ASSERT_EQ(static_cast(FOURCC_BGRA), CanonicalFourCC(FOURCC_CM32)); - ASSERT_EQ(static_cast(FOURCC_RAW), CanonicalFourCC(FOURCC_CM24)); - ASSERT_EQ(static_cast(FOURCC_RGBO), CanonicalFourCC(FOURCC_L555)); - ASSERT_EQ(static_cast(FOURCC_RGBP), CanonicalFourCC(FOURCC_L565)); - ASSERT_EQ(static_cast(FOURCC_RGBO), CanonicalFourCC(FOURCC_5551)); + EXPECT_EQ(static_cast(FOURCC_I420), CanonicalFourCC(FOURCC_IYUV)); + EXPECT_EQ(static_cast(FOURCC_I420), CanonicalFourCC(FOURCC_YU12)); + EXPECT_EQ(static_cast(FOURCC_I422), CanonicalFourCC(FOURCC_YU16)); + EXPECT_EQ(static_cast(FOURCC_I444), CanonicalFourCC(FOURCC_YU24)); + EXPECT_EQ(static_cast(FOURCC_YUY2), CanonicalFourCC(FOURCC_YUYV)); + EXPECT_EQ(static_cast(FOURCC_YUY2), CanonicalFourCC(FOURCC_YUVS)); + EXPECT_EQ(static_cast(FOURCC_UYVY), CanonicalFourCC(FOURCC_HDYC)); + EXPECT_EQ(static_cast(FOURCC_UYVY), CanonicalFourCC(FOURCC_2VUY)); + EXPECT_EQ(static_cast(FOURCC_MJPG), CanonicalFourCC(FOURCC_JPEG)); + EXPECT_EQ(static_cast(FOURCC_MJPG), CanonicalFourCC(FOURCC_DMB1)); + EXPECT_EQ(static_cast(FOURCC_RAW), CanonicalFourCC(FOURCC_RGB3)); + EXPECT_EQ(static_cast(FOURCC_24BG), CanonicalFourCC(FOURCC_BGR3)); + EXPECT_EQ(static_cast(FOURCC_BGRA), CanonicalFourCC(FOURCC_CM32)); + EXPECT_EQ(static_cast(FOURCC_RAW), CanonicalFourCC(FOURCC_CM24)); + EXPECT_EQ(static_cast(FOURCC_RGBO), CanonicalFourCC(FOURCC_L555)); + EXPECT_EQ(static_cast(FOURCC_RGBP), CanonicalFourCC(FOURCC_L565)); + EXPECT_EQ(static_cast(FOURCC_RGBO), CanonicalFourCC(FOURCC_5551)); } TEST_F(LibYUVBaseTest, TestFourCC) { - ASSERT_TRUE(TestValidFourCC(FOURCC_I420, FOURCC_BPP_I420)); - ASSERT_TRUE(TestValidFourCC(FOURCC_I420, FOURCC_BPP_I420)); - ASSERT_TRUE(TestValidFourCC(FOURCC_I422, FOURCC_BPP_I422)); - ASSERT_TRUE(TestValidFourCC(FOURCC_I444, FOURCC_BPP_I444)); - ASSERT_TRUE(TestValidFourCC(FOURCC_I400, FOURCC_BPP_I400)); - ASSERT_TRUE(TestValidFourCC(FOURCC_NV21, FOURCC_BPP_NV21)); - ASSERT_TRUE(TestValidFourCC(FOURCC_NV12, FOURCC_BPP_NV12)); - ASSERT_TRUE(TestValidFourCC(FOURCC_YUY2, FOURCC_BPP_YUY2)); - ASSERT_TRUE(TestValidFourCC(FOURCC_UYVY, FOURCC_BPP_UYVY)); - ASSERT_TRUE(TestValidFourCC(FOURCC_M420, FOURCC_BPP_M420)); // deprecated. - ASSERT_TRUE(TestValidFourCC(FOURCC_Q420, FOURCC_BPP_Q420)); // deprecated. - ASSERT_TRUE(TestValidFourCC(FOURCC_ARGB, FOURCC_BPP_ARGB)); - ASSERT_TRUE(TestValidFourCC(FOURCC_BGRA, FOURCC_BPP_BGRA)); - ASSERT_TRUE(TestValidFourCC(FOURCC_ABGR, FOURCC_BPP_ABGR)); - ASSERT_TRUE(TestValidFourCC(FOURCC_AR30, FOURCC_BPP_AR30)); - ASSERT_TRUE(TestValidFourCC(FOURCC_AB30, FOURCC_BPP_AB30)); - ASSERT_TRUE(TestValidFourCC(FOURCC_AR64, FOURCC_BPP_AR64)); - ASSERT_TRUE(TestValidFourCC(FOURCC_AB64, FOURCC_BPP_AB64)); - ASSERT_TRUE(TestValidFourCC(FOURCC_24BG, FOURCC_BPP_24BG)); - ASSERT_TRUE(TestValidFourCC(FOURCC_RAW, FOURCC_BPP_RAW)); - ASSERT_TRUE(TestValidFourCC(FOURCC_RGBA, FOURCC_BPP_RGBA)); - ASSERT_TRUE(TestValidFourCC(FOURCC_RGBP, FOURCC_BPP_RGBP)); - ASSERT_TRUE(TestValidFourCC(FOURCC_RGBO, FOURCC_BPP_RGBO)); - ASSERT_TRUE(TestValidFourCC(FOURCC_R444, FOURCC_BPP_R444)); - ASSERT_TRUE(TestValidFourCC(FOURCC_H420, FOURCC_BPP_H420)); - ASSERT_TRUE(TestValidFourCC(FOURCC_H422, FOURCC_BPP_H422)); - ASSERT_TRUE(TestValidFourCC(FOURCC_H010, FOURCC_BPP_H010)); - ASSERT_TRUE(TestValidFourCC(FOURCC_H210, FOURCC_BPP_H210)); - ASSERT_TRUE(TestValidFourCC(FOURCC_I010, FOURCC_BPP_I010)); - ASSERT_TRUE(TestValidFourCC(FOURCC_I210, FOURCC_BPP_I210)); - ASSERT_TRUE(TestValidFourCC(FOURCC_P010, FOURCC_BPP_P010)); - ASSERT_TRUE(TestValidFourCC(FOURCC_P210, FOURCC_BPP_P210)); - ASSERT_TRUE(TestValidFourCC(FOURCC_MJPG, FOURCC_BPP_MJPG)); - ASSERT_TRUE(TestValidFourCC(FOURCC_YV12, FOURCC_BPP_YV12)); - ASSERT_TRUE(TestValidFourCC(FOURCC_YV16, FOURCC_BPP_YV16)); - ASSERT_TRUE(TestValidFourCC(FOURCC_YV24, FOURCC_BPP_YV24)); - ASSERT_TRUE(TestValidFourCC(FOURCC_YU12, FOURCC_BPP_YU12)); - ASSERT_TRUE(TestValidFourCC(FOURCC_IYUV, FOURCC_BPP_IYUV)); - ASSERT_TRUE(TestValidFourCC(FOURCC_YU16, FOURCC_BPP_YU16)); - ASSERT_TRUE(TestValidFourCC(FOURCC_YU24, FOURCC_BPP_YU24)); - ASSERT_TRUE(TestValidFourCC(FOURCC_YUYV, FOURCC_BPP_YUYV)); - ASSERT_TRUE(TestValidFourCC(FOURCC_YUVS, FOURCC_BPP_YUVS)); - ASSERT_TRUE(TestValidFourCC(FOURCC_HDYC, FOURCC_BPP_HDYC)); - ASSERT_TRUE(TestValidFourCC(FOURCC_2VUY, FOURCC_BPP_2VUY)); - ASSERT_TRUE(TestValidFourCC(FOURCC_JPEG, FOURCC_BPP_JPEG)); - ASSERT_TRUE(TestValidFourCC(FOURCC_DMB1, FOURCC_BPP_DMB1)); - ASSERT_TRUE(TestValidFourCC(FOURCC_BA81, FOURCC_BPP_BA81)); - ASSERT_TRUE(TestValidFourCC(FOURCC_RGB3, FOURCC_BPP_RGB3)); - ASSERT_TRUE(TestValidFourCC(FOURCC_BGR3, FOURCC_BPP_BGR3)); - ASSERT_TRUE(TestValidFourCC(FOURCC_H264, FOURCC_BPP_H264)); - ASSERT_TRUE(TestValidFourCC(FOURCC_ANY, FOURCC_BPP_ANY)); + EXPECT_TRUE(TestValidFourCC(FOURCC_I420, FOURCC_BPP_I420)); + EXPECT_TRUE(TestValidFourCC(FOURCC_I420, FOURCC_BPP_I420)); + EXPECT_TRUE(TestValidFourCC(FOURCC_I422, FOURCC_BPP_I422)); + EXPECT_TRUE(TestValidFourCC(FOURCC_I444, FOURCC_BPP_I444)); + EXPECT_TRUE(TestValidFourCC(FOURCC_I400, FOURCC_BPP_I400)); + EXPECT_TRUE(TestValidFourCC(FOURCC_NV21, FOURCC_BPP_NV21)); + EXPECT_TRUE(TestValidFourCC(FOURCC_NV12, FOURCC_BPP_NV12)); + EXPECT_TRUE(TestValidFourCC(FOURCC_YUY2, FOURCC_BPP_YUY2)); + EXPECT_TRUE(TestValidFourCC(FOURCC_UYVY, FOURCC_BPP_UYVY)); + EXPECT_TRUE(TestValidFourCC(FOURCC_M420, FOURCC_BPP_M420)); // deprecated. + EXPECT_TRUE(TestValidFourCC(FOURCC_Q420, FOURCC_BPP_Q420)); // deprecated. + EXPECT_TRUE(TestValidFourCC(FOURCC_ARGB, FOURCC_BPP_ARGB)); + EXPECT_TRUE(TestValidFourCC(FOURCC_BGRA, FOURCC_BPP_BGRA)); + EXPECT_TRUE(TestValidFourCC(FOURCC_ABGR, FOURCC_BPP_ABGR)); + EXPECT_TRUE(TestValidFourCC(FOURCC_AR30, FOURCC_BPP_AR30)); + EXPECT_TRUE(TestValidFourCC(FOURCC_AB30, FOURCC_BPP_AB30)); + EXPECT_TRUE(TestValidFourCC(FOURCC_AR64, FOURCC_BPP_AR64)); + EXPECT_TRUE(TestValidFourCC(FOURCC_AB64, FOURCC_BPP_AB64)); + EXPECT_TRUE(TestValidFourCC(FOURCC_24BG, FOURCC_BPP_24BG)); + EXPECT_TRUE(TestValidFourCC(FOURCC_RAW, FOURCC_BPP_RAW)); + EXPECT_TRUE(TestValidFourCC(FOURCC_RGBA, FOURCC_BPP_RGBA)); + EXPECT_TRUE(TestValidFourCC(FOURCC_RGBP, FOURCC_BPP_RGBP)); + EXPECT_TRUE(TestValidFourCC(FOURCC_RGBO, FOURCC_BPP_RGBO)); + EXPECT_TRUE(TestValidFourCC(FOURCC_R444, FOURCC_BPP_R444)); + EXPECT_TRUE(TestValidFourCC(FOURCC_H420, FOURCC_BPP_H420)); + EXPECT_TRUE(TestValidFourCC(FOURCC_H422, FOURCC_BPP_H422)); + EXPECT_TRUE(TestValidFourCC(FOURCC_H010, FOURCC_BPP_H010)); + EXPECT_TRUE(TestValidFourCC(FOURCC_H210, FOURCC_BPP_H210)); + EXPECT_TRUE(TestValidFourCC(FOURCC_I010, FOURCC_BPP_I010)); + EXPECT_TRUE(TestValidFourCC(FOURCC_I210, FOURCC_BPP_I210)); + EXPECT_TRUE(TestValidFourCC(FOURCC_P010, FOURCC_BPP_P010)); + EXPECT_TRUE(TestValidFourCC(FOURCC_P210, FOURCC_BPP_P210)); + EXPECT_TRUE(TestValidFourCC(FOURCC_MJPG, FOURCC_BPP_MJPG)); + EXPECT_TRUE(TestValidFourCC(FOURCC_YV12, FOURCC_BPP_YV12)); + EXPECT_TRUE(TestValidFourCC(FOURCC_YV16, FOURCC_BPP_YV16)); + EXPECT_TRUE(TestValidFourCC(FOURCC_YV24, FOURCC_BPP_YV24)); + EXPECT_TRUE(TestValidFourCC(FOURCC_YU12, FOURCC_BPP_YU12)); + EXPECT_TRUE(TestValidFourCC(FOURCC_IYUV, FOURCC_BPP_IYUV)); + EXPECT_TRUE(TestValidFourCC(FOURCC_YU16, FOURCC_BPP_YU16)); + EXPECT_TRUE(TestValidFourCC(FOURCC_YU24, FOURCC_BPP_YU24)); + EXPECT_TRUE(TestValidFourCC(FOURCC_YUYV, FOURCC_BPP_YUYV)); + EXPECT_TRUE(TestValidFourCC(FOURCC_YUVS, FOURCC_BPP_YUVS)); + EXPECT_TRUE(TestValidFourCC(FOURCC_HDYC, FOURCC_BPP_HDYC)); + EXPECT_TRUE(TestValidFourCC(FOURCC_2VUY, FOURCC_BPP_2VUY)); + EXPECT_TRUE(TestValidFourCC(FOURCC_JPEG, FOURCC_BPP_JPEG)); + EXPECT_TRUE(TestValidFourCC(FOURCC_DMB1, FOURCC_BPP_DMB1)); + EXPECT_TRUE(TestValidFourCC(FOURCC_BA81, FOURCC_BPP_BA81)); + EXPECT_TRUE(TestValidFourCC(FOURCC_RGB3, FOURCC_BPP_RGB3)); + EXPECT_TRUE(TestValidFourCC(FOURCC_BGR3, FOURCC_BPP_BGR3)); + EXPECT_TRUE(TestValidFourCC(FOURCC_H264, FOURCC_BPP_H264)); + EXPECT_TRUE(TestValidFourCC(FOURCC_ANY, FOURCC_BPP_ANY)); } } // namespace libyuv diff --git a/util/cpuid.c b/util/cpuid.c index 38b2c0e9d..bbaea8398 100644 --- a/util/cpuid.c +++ b/util/cpuid.c @@ -15,6 +15,8 @@ #ifdef __linux__ #include #include +#include +#include #endif #include "libyuv/cpu_id.h" @@ -40,6 +42,14 @@ static void KernelVersion(int* version) { } #endif +#ifdef __linux__ +static sigjmp_buf vdpphps_jmpbuf; +static void vdpphps_sigill_handler(int sig) { + (void)sig; + siglongjmp(vdpphps_jmpbuf, 1); +} +#endif + int main(int argc, const char* argv[]) { (void)argc; (void)argv; @@ -182,6 +192,7 @@ int main(int argc, const char* argv[]) { int has_avxvnni = TestCpuFlag(kCpuHasAVXVNNI); int has_avxvnniint8 = TestCpuFlag(kCpuHasAVXVNNIINT8); int has_amxint8 = TestCpuFlag(kCpuHasAMXINT8); + int has_avx512bmm = TestCpuFlag(kCpuHasAVX512BMM); printf("Has X86 0x%x\n", has_x86); printf("Has SSE2 0x%x\n", has_sse2); printf("Has SSSE3 0x%x\n", has_ssse3); @@ -204,6 +215,30 @@ int main(int argc, const char* argv[]) { printf("HAS AVXVNNI 0x%x\n", has_avxvnni); printf("Has AVXVNNIINT8 0x%x\n", has_avxvnniint8); printf("Has AMXINT8 0x%x\n", has_amxint8); + printf("Has AVX512BMM 0x%x\n", has_avx512bmm); + +#ifdef __linux__ + // Test VDPPHPS instruction + { + struct sigaction act, oldact; + memset(&act, 0, sizeof(act)); + act.sa_handler = vdpphps_sigill_handler; + sigaction(SIGILL, &act, &oldact); + + printf("Testing VDPPHPS instruction... "); + fflush(stdout); + + if (sigsetjmp(vdpphps_jmpbuf, 1) == 0) { + // VDPPHPS xmm0, xmm0, xmm0 + __asm__ volatile("vdpphps %%xmm0, %%xmm0, %%xmm0" : : : "xmm0"); + printf("Works!\n"); + } else { + printf("Crashed (SIGILL)!\n"); + } + + sigaction(SIGILL, &oldact, NULL); + } +#endif } #endif // defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || // defined(_M_X64) diff --git a/util/ssim.cc b/util/ssim.cc index f8b4509f8..096fbcf06 100644 --- a/util/ssim.cc +++ b/util/ssim.cc @@ -244,23 +244,23 @@ double GetSSIMFullKernel(const uint8_t* org, // Read 8 pixels at line #L, and convert to 16bit, perform weighting // and acccumulate. -#define LOAD_LINE_PAIR(L, WEIGHT) \ - do { \ - const __m128i v0 = \ - _mm_loadl_epi64(reinterpret_cast(org + (L) * stride)); \ - const __m128i v1 = \ - _mm_loadl_epi64(reinterpret_cast(rec + (L) * stride)); \ - const __m128i w0 = _mm_unpacklo_epi8(v0, zero); \ - const __m128i w1 = _mm_unpacklo_epi8(v1, zero); \ - const __m128i ww0 = _mm_mullo_epi16(w0, (WEIGHT).values_.m_); \ - const __m128i ww1 = _mm_mullo_epi16(w1, (WEIGHT).values_.m_); \ - x = _mm_add_epi32(x, _mm_unpacklo_epi16(ww0, zero)); \ - y = _mm_add_epi32(y, _mm_unpacklo_epi16(ww1, zero)); \ - x = _mm_add_epi32(x, _mm_unpackhi_epi16(ww0, zero)); \ - y = _mm_add_epi32(y, _mm_unpackhi_epi16(ww1, zero)); \ - xx = _mm_add_epi32(xx, _mm_madd_epi16(ww0, w0)); \ - xy = _mm_add_epi32(xy, _mm_madd_epi16(ww0, w1)); \ - yy = _mm_add_epi32(yy, _mm_madd_epi16(ww1, w1)); \ +#define LOAD_LINE_PAIR(L, WEIGHT) \ + do { \ + const __m128i v0 = \ + _mm_loadl_epi64(reinterpret_cast(org + (L)*stride)); \ + const __m128i v1 = \ + _mm_loadl_epi64(reinterpret_cast(rec + (L)*stride)); \ + const __m128i w0 = _mm_unpacklo_epi8(v0, zero); \ + const __m128i w1 = _mm_unpacklo_epi8(v1, zero); \ + const __m128i ww0 = _mm_mullo_epi16(w0, (WEIGHT).values_.m_); \ + const __m128i ww1 = _mm_mullo_epi16(w1, (WEIGHT).values_.m_); \ + x = _mm_add_epi32(x, _mm_unpacklo_epi16(ww0, zero)); \ + y = _mm_add_epi32(y, _mm_unpacklo_epi16(ww1, zero)); \ + x = _mm_add_epi32(x, _mm_unpackhi_epi16(ww0, zero)); \ + y = _mm_add_epi32(y, _mm_unpackhi_epi16(ww1, zero)); \ + xx = _mm_add_epi32(xx, _mm_madd_epi16(ww0, w0)); \ + xy = _mm_add_epi32(xy, _mm_madd_epi16(ww0, w1)); \ + yy = _mm_add_epi32(yy, _mm_madd_epi16(ww1, w1)); \ } while (0) #define ADD_AND_STORE_FOUR_EPI32(M, OUT) \