add bmm detect and vdpphps in util/cpuid

Bug: None
Change-Id: I9954f96a74e653e3ecd3fbeba533299fa8e57d95
This commit is contained in:
Frank Barchard 2026-06-09 11:30:45 -07:00
parent 3bdb3b94ca
commit ca577883ae
69 changed files with 8440 additions and 8659 deletions

View File

@ -1,7 +1,4 @@
# This is the Android makefile for libyuv for NDK.
# Ignore this file during non-NDK builds.
ifdef NDK_ROOT
LOCAL_PATH:= $(call my-dir)
include $(CLEAR_VARS)
@ -107,4 +104,3 @@ LOCAL_SRC_FILES := \
LOCAL_MODULE := libyuv_unittest
include $(BUILD_NATIVE_TEST)
endif # NDK_ROOT

View File

@ -22,6 +22,13 @@ declare_args() {
config("libyuv_config") {
include_dirs = [ "include" ]
if (is_android) {
if (target_cpu == "arm" || target_cpu == "x86") {
ldflags = [ "-Wl,--dynamic-linker,/system/bin/linker" ]
} else {
ldflags = [ "-Wl,--dynamic-linker,/system/bin/linker64" ]
}
}
# Define CHROMIUM to tell cpu_id to avoid sandbox unsafe system calls.
defines = [ "CHROMIUM" ]

View File

@ -1,6 +1,6 @@
Name: libyuv
URL: https://chromium.googlesource.com/libyuv/libyuv/
Version: 1948
Version: 1937
Revision: DEPS
License: BSD-3-Clause
License File: LICENSE

View File

@ -33,6 +33,7 @@ By default the cpu is detected and the most advanced form of SIMD is used. But
LIBYUV_DISABLE_AVXVNNI
LIBYUV_DISABLE_AVXVNNIINT8
LIBYUV_DISABLE_AMXINT8
LIBYUV_DISABLE_AVX512BMM
## Arm CPUs

View File

@ -72,6 +72,7 @@ Additional commonly used compiler options can be passed to Bazel via `--copt`:
bazel build -c opt --config=android_arm64 \
--copt=-DLIBYUV_UNLIMITED_DATA \
--copt=-DLIBYUV_BIT_EXACT=1 \
--copt=-DENABLE_ROW_TESTS \
//:libyuv_test

View File

@ -888,45 +888,6 @@ int ABGRToI420(const uint8_t* src_abgr,
int width,
int height);
// BGRA little endian (argb in memory) to I422.
LIBYUV_API
int BGRAToI422(const uint8_t* src_bgra,
int src_stride_bgra,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// ABGR little endian (rgba in memory) to I422.
LIBYUV_API
int ABGRToI422(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// RGBA little endian (abgr in memory) to I422.
LIBYUV_API
int RGBAToI422(const uint8_t* src_rgba,
int src_stride_rgba,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// RGBA little endian (abgr in memory) to I420.
LIBYUV_API
int RGBAToI420(const uint8_t* src_rgba,

View File

@ -245,19 +245,6 @@ int ARGBToI422(const uint8_t* src_argb,
int width,
int height);
// Convert ABGR To I422.
LIBYUV_API
int ABGRToI422(const uint8_t* src_abgr,
int src_stride_abgr,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_u,
int dst_stride_u,
uint8_t* dst_v,
int dst_stride_v,
int width,
int height);
// RGB to I444 with matrix. See ArgbConstants at the top of this file for usage.
LIBYUV_API
int ARGBToI422Matrix(const uint8_t* src_argb,

View File

@ -60,6 +60,7 @@ static const int kCpuHasAVX10_2 = 0x2000000;
static const int kCpuHasAVXVNNI = 0x4000000;
static const int kCpuHasAVXVNNIINT8 = 0x8000000;
static const int kCpuHasAMXINT8 = 0x10000000;
static const int kCpuHasAVX512BMM = 0x20000000;
// These flags are only valid on LOONGARCH processors.
static const int kCpuHasLOONGARCH = 0x20;

File diff suppressed because it is too large Load Diff

View File

@ -631,8 +631,8 @@ static inline void I422ToRGB565Row_SVE_SC(
// Calculate a predicate for the final iteration to deal with the tail.
"cnth %[vl] \n"
"whilelt p1.b, wzr, %w[width] \n" //
READYUV422_SVE_2X I422TORGB_SVE_2X
RGBTOARGB8_SVE_TOP_2X RGB8TORGB565_SVE_FROM_TOP_2X
READYUV422_SVE_2X I422TORGB_SVE_2X RGBTOARGB8_SVE_TOP_2X
RGB8TORGB565_SVE_FROM_TOP_2X
// Need to permute the data on the final iteration such that the
// predicates (.b) line up with the 16-bit element data.
"trn1 z20.b, z18.b, z19.b \n"
@ -694,8 +694,8 @@ static inline void I422ToARGB1555Row_SVE_SC(
// Calculate a predicate for the final iteration to deal with the tail.
"cnth %[vl] \n"
"whilelt p1.b, wzr, %w[width] \n" //
READYUV422_SVE_2X I422TORGB_SVE_2X
RGBTOARGB8_SVE_TOP_2X RGB8TOARGB1555_SVE_FROM_TOP_2X
READYUV422_SVE_2X I422TORGB_SVE_2X RGBTOARGB8_SVE_TOP_2X
RGB8TOARGB1555_SVE_FROM_TOP_2X
"st2h {z0.h, z1.h}, p1, [%[dst]] \n"
"99: \n"
@ -753,8 +753,8 @@ static inline void I422ToARGB4444Row_SVE_SC(
// Calculate a predicate for the final iteration to deal with the tail.
"cnth %[vl] \n"
"whilelt p1.b, wzr, %w[width] \n" //
READYUV422_SVE_2X I422TORGB_SVE_2X
RGBTOARGB8_SVE_TOP_2X RGB8TOARGB4444_SVE_FROM_TOP_2X
READYUV422_SVE_2X I422TORGB_SVE_2X RGBTOARGB8_SVE_TOP_2X
RGB8TOARGB4444_SVE_FROM_TOP_2X
"st2h {z0.h, z1.h}, p1, [%[dst]] \n"
"99: \n"

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1948
#define LIBYUV_VERSION 1937
#endif // INCLUDE_LIBYUV_VERSION_H_

View File

@ -122,6 +122,18 @@
'include',
'.',
],
'conditions': [
['OS == "android" and target_arch == "arm64"', {
'ldflags': [
'-Wl,--dynamic-linker,/system/bin/linker64',
],
}],
['OS == "android" and target_arch != "arm64"', {
'ldflags': [
'-Wl,--dynamic-linker,/system/bin/linker',
],
}],
], #conditions
},
'sources': [
'<@(libyuv_sources)',

View File

@ -69,7 +69,6 @@
'source/row_lsx.cc',
'source/row_neon.cc',
'source/row_neon64.cc',
'source/row_rvv.cc',
'source/row_win.cc',
'source/scale.cc',
'source/scale_any.cc',
@ -80,7 +79,6 @@
'source/scale_neon.cc',
'source/scale_neon64.cc',
'source/scale_rgb.cc',
'source/scale_rvv.cc',
'source/scale_uv.cc',
'source/scale_win.cc',
'source/video_common.cc',

View File

@ -11,7 +11,6 @@
#include "libyuv/compare.h"
#include <float.h>
#include <limits.h>
#include <math.h>
#ifdef _OPENMP
#include <omp.h>
@ -107,11 +106,8 @@ uint32_t ARGBDetect(const uint8_t* argb,
uint32_t fourcc = 0;
int h;
if (!argb || width <= 0 || height <= 0) {
return fourcc;
}
// Coalesce rows.
if (stride_argb == width * 4 && (ptrdiff_t)width * height <= INT_MAX) {
if (stride_argb == width * 4) {
width *= height;
height = 1;
stride_argb = 0;
@ -249,12 +245,8 @@ uint64_t ComputeSumSquareErrorPlane(const uint8_t* src_a,
int height) {
uint64_t sse = 0;
int h;
if (!src_a || !src_b || width <= 0 || height <= 0) {
return sse;
}
// Coalesce rows.
if (stride_a == width && stride_b == width &&
(ptrdiff_t)width * height <= INT_MAX) {
if (stride_a == width && stride_b == width) {
width *= height;
height = 1;
stride_a = stride_b = 0;

View File

@ -116,7 +116,7 @@ uint32_t HashDjb2_NEON(const uint8_t* src, int count, uint32_t seed) {
uint32_t hash = seed;
const uint32_t c16 = 0x92d9e201; // 33^16
uint32_t tmp, tmp2;
asm("ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [%[kIdx]] \n"
asm("ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [%[kIdx]] \n"
"ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [%[kMuls]] \n"
// count is always a multiple of 16.

View File

@ -41,9 +41,8 @@ uint32_t HammingDistance_SSE42(const uint8_t* src_a,
return diff;
}
__declspec(naked) uint32_t SumSquareError_SSE2(const uint8_t* src_a,
const uint8_t* src_b,
int count) {
__declspec(naked) uint32_t
SumSquareError_SSE2(const uint8_t* src_a, const uint8_t* src_b, int count) {
__asm {
mov eax, [esp + 4] // src_a
mov edx, [esp + 8] // src_b
@ -82,9 +81,8 @@ __declspec(naked) uint32_t SumSquareError_SSE2(const uint8_t* src_a,
#ifdef HAS_SUMSQUAREERROR_AVX2
// C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX.
#pragma warning(disable : 4752)
__declspec(naked) uint32_t SumSquareError_AVX2(const uint8_t* src_a,
const uint8_t* src_b,
int count) {
__declspec(naked) uint32_t
SumSquareError_AVX2(const uint8_t* src_a, const uint8_t* src_b, int count) {
__asm {
mov eax, [esp + 4] // src_a
mov edx, [esp + 8] // src_b
@ -148,9 +146,8 @@ uvec32 kHashMul3 = {
0x00000001, // 33 ^ 0
};
__declspec(naked) uint32_t HashDjb2_SSE41(const uint8_t* src,
int count,
uint32_t seed) {
__declspec(naked) uint32_t
HashDjb2_SSE41(const uint8_t* src, int count, uint32_t seed) {
__asm {
mov eax, [esp + 4] // src
mov ecx, [esp + 8] // count
@ -200,9 +197,8 @@ __declspec(naked) uint32_t HashDjb2_SSE41(const uint8_t* src,
// Visual C 2012 required for AVX2.
#ifdef HAS_HASHDJB2_AVX2
__declspec(naked) uint32_t HashDjb2_AVX2(const uint8_t* src,
int count,
uint32_t seed) {
__declspec(naked) uint32_t
HashDjb2_AVX2(const uint8_t* src, int count, uint32_t seed) {
__asm {
mov eax, [esp + 4] // src
mov ecx, [esp + 8] // count

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -10,8 +10,6 @@
#include "libyuv/convert_from.h"
#include <limits.h>
#include "libyuv/basic_types.h"
#include "libyuv/convert.h" // For I420Copy
#include "libyuv/cpu_id.h"
@ -89,16 +87,16 @@ int I420ToI010(const uint8_t* src_y,
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
height == 0 || height == INT_MIN) {
height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
halfheight = (height + 1) >> 1;
src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y;
src_u = src_u + (ptrdiff_t)(halfheight - 1) * src_stride_u;
src_v = src_v + (ptrdiff_t)(halfheight - 1) * src_stride_v;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (halfheight - 1) * src_stride_u;
src_v = src_v + (halfheight - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
@ -134,16 +132,16 @@ int I420ToI012(const uint8_t* src_y,
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
height == 0 || height == INT_MIN) {
height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
halfheight = (height + 1) >> 1;
src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y;
src_u = src_u + (ptrdiff_t)(halfheight - 1) * src_stride_u;
src_v = src_v + (ptrdiff_t)(halfheight - 1) * src_stride_v;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (halfheight - 1) * src_stride_u;
src_v = src_v + (halfheight - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
@ -228,7 +226,7 @@ int I010ToI410(const uint16_t* src_y,
int height) {
int r;
if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
height == 0 || height == INT_MIN) {
height == 0) {
return -1;
}
@ -265,7 +263,7 @@ int I210ToI410(const uint16_t* src_y,
int height) {
int r;
if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
height == 0 || height == INT_MIN) {
height == 0) {
return -1;
}
@ -301,7 +299,7 @@ int I422ToI444(const uint8_t* src_y,
int height) {
int r;
if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
height == 0 || height == INT_MIN) {
height == 0) {
return -1;
}
@ -326,7 +324,7 @@ int I400Copy(const uint8_t* src_y,
int dst_stride_y,
int width,
int height) {
if (!src_y || !dst_y || width <= 0 || height == 0 || height == INT_MIN) {
if (!src_y || !dst_y || width <= 0 || height == 0) {
return -1;
}
CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
@ -348,20 +346,18 @@ int I422ToYUY2(const uint8_t* src_y,
void (*I422ToYUY2Row)(const uint8_t* src_y, const uint8_t* src_u,
const uint8_t* src_v, uint8_t* dst_yuy2, int width) =
I422ToYUY2Row_C;
if (!src_y || !src_u || !src_v || !dst_yuy2 || width <= 0 || height == 0 ||
height == INT_MIN) {
if (!src_y || !src_u || !src_v || !dst_yuy2 || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_yuy2 = dst_yuy2 + (ptrdiff_t)(height - 1) * dst_stride_yuy2;
dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2;
dst_stride_yuy2 = -dst_stride_yuy2;
}
// Coalesce rows.
if (src_stride_y == width && src_stride_u * 2 == width &&
src_stride_v * 2 == width && dst_stride_yuy2 == width * 2 &&
(ptrdiff_t)width * height <= INT_MAX) {
src_stride_v * 2 == width && dst_stride_yuy2 == width * 2) {
width *= height;
height = 1;
src_stride_y = src_stride_u = src_stride_v = dst_stride_yuy2 = 0;
@ -416,14 +412,13 @@ int I420ToYUY2(const uint8_t* src_y,
void (*I422ToYUY2Row)(const uint8_t* src_y, const uint8_t* src_u,
const uint8_t* src_v, uint8_t* dst_yuy2, int width) =
I422ToYUY2Row_C;
if (!src_y || !src_u || !src_v || !dst_yuy2 || width <= 0 || height == 0 ||
height == INT_MIN) {
if (!src_y || !src_u || !src_v || !dst_yuy2 || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_yuy2 = dst_yuy2 + (ptrdiff_t)(height - 1) * dst_stride_yuy2;
dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2;
dst_stride_yuy2 = -dst_stride_yuy2;
}
#if defined(HAS_I422TOYUY2ROW_SSE2)
@ -497,20 +492,18 @@ int I422ToUYVY(const uint8_t* src_y,
void (*I422ToUYVYRow)(const uint8_t* src_y, const uint8_t* src_u,
const uint8_t* src_v, uint8_t* dst_uyvy, int width) =
I422ToUYVYRow_C;
if (!src_y || !src_u || !src_v || !dst_uyvy || width <= 0 || height == 0 ||
height == INT_MIN) {
if (!src_y || !src_u || !src_v || !dst_uyvy || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_uyvy = dst_uyvy + (ptrdiff_t)(height - 1) * dst_stride_uyvy;
dst_uyvy = dst_uyvy + (height - 1) * dst_stride_uyvy;
dst_stride_uyvy = -dst_stride_uyvy;
}
// Coalesce rows.
if (src_stride_y == width && src_stride_u * 2 == width &&
src_stride_v * 2 == width && dst_stride_uyvy == width * 2 &&
(ptrdiff_t)width * height <= INT_MAX) {
src_stride_v * 2 == width && dst_stride_uyvy == width * 2) {
width *= height;
height = 1;
src_stride_y = src_stride_u = src_stride_v = dst_stride_uyvy = 0;
@ -581,14 +574,13 @@ int I420ToUYVY(const uint8_t* src_y,
void (*I422ToUYVYRow)(const uint8_t* src_y, const uint8_t* src_u,
const uint8_t* src_v, uint8_t* dst_uyvy, int width) =
I422ToUYVYRow_C;
if (!src_y || !src_u || !src_v || !dst_uyvy || width <= 0 || height == 0 ||
height == INT_MIN) {
if (!src_y || !src_u || !src_v || !dst_uyvy || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_uyvy = dst_uyvy + (ptrdiff_t)(height - 1) * dst_stride_uyvy;
dst_uyvy = dst_uyvy + (height - 1) * dst_stride_uyvy;
dst_stride_uyvy = -dst_stride_uyvy;
}
#if defined(HAS_I422TOUYVYROW_SSE2)
@ -663,16 +655,16 @@ int I420ToNV12(const uint8_t* src_y,
int halfwidth = (width + 1) / 2;
int halfheight = (height + 1) / 2;
if ((!src_y && dst_y) || !src_u || !src_v || !dst_uv || width <= 0 ||
height == 0 || height == INT_MIN) {
height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
halfheight = (height + 1) >> 1;
src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y;
src_u = src_u + (ptrdiff_t)(halfheight - 1) * src_stride_u;
src_v = src_v + (ptrdiff_t)(halfheight - 1) * src_stride_v;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (halfheight - 1) * src_stride_u;
src_v = src_v + (halfheight - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
@ -718,8 +710,7 @@ int ConvertFromI420(const uint8_t* y,
uint32_t fourcc) {
uint32_t format = CanonicalFourCC(fourcc);
int r = 0;
if (!y || !u || !v || !dst_sample || width <= 0 || height == 0 ||
height == INT_MIN) {
if (!y || !u || !v || !dst_sample || width <= 0 || height == 0) {
return -1;
}
switch (format) {
@ -791,7 +782,7 @@ int ConvertFromI420(const uint8_t* y,
break;
case FOURCC_NV12: {
int dst_y_stride = dst_sample_stride ? dst_sample_stride : width;
uint8_t* dst_uv = dst_sample + (ptrdiff_t)dst_y_stride * height;
uint8_t* dst_uv = dst_sample + dst_y_stride * height;
r = I420ToNV12(y, y_stride, u, u_stride, v, v_stride, dst_sample,
dst_sample_stride ? dst_sample_stride : width, dst_uv,
dst_sample_stride ? dst_sample_stride : width, width,
@ -800,7 +791,7 @@ int ConvertFromI420(const uint8_t* y,
}
case FOURCC_NV21: {
int dst_y_stride = dst_sample_stride ? dst_sample_stride : width;
uint8_t* dst_vu = dst_sample + (ptrdiff_t)dst_y_stride * height;
uint8_t* dst_vu = dst_sample + dst_y_stride * height;
r = I420ToNV21(y, y_stride, u, u_stride, v, v_stride, dst_sample,
dst_sample_stride ? dst_sample_stride : width, dst_vu,
dst_sample_stride ? dst_sample_stride : width, width,
@ -816,11 +807,11 @@ int ConvertFromI420(const uint8_t* y,
uint8_t* dst_u;
uint8_t* dst_v;
if (format == FOURCC_YV12) {
dst_v = dst_sample + (ptrdiff_t)dst_sample_stride * height;
dst_u = dst_v + (ptrdiff_t)halfstride * halfheight;
dst_v = dst_sample + dst_sample_stride * height;
dst_u = dst_v + halfstride * halfheight;
} else {
dst_u = dst_sample + (ptrdiff_t)dst_sample_stride * height;
dst_v = dst_u + (ptrdiff_t)halfstride * halfheight;
dst_u = dst_sample + dst_sample_stride * height;
dst_v = dst_u + halfstride * halfheight;
}
r = I420Copy(y, y_stride, u, u_stride, v, v_stride, dst_sample,
dst_sample_stride, dst_u, halfstride, dst_v, halfstride,
@ -834,11 +825,11 @@ int ConvertFromI420(const uint8_t* y,
uint8_t* dst_u;
uint8_t* dst_v;
if (format == FOURCC_YV16) {
dst_v = dst_sample + (ptrdiff_t)dst_sample_stride * height;
dst_u = dst_v + (ptrdiff_t)halfstride * height;
dst_v = dst_sample + dst_sample_stride * height;
dst_u = dst_v + halfstride * height;
} else {
dst_u = dst_sample + (ptrdiff_t)dst_sample_stride * height;
dst_v = dst_u + (ptrdiff_t)halfstride * height;
dst_u = dst_sample + dst_sample_stride * height;
dst_v = dst_u + halfstride * height;
}
r = I420ToI422(y, y_stride, u, u_stride, v, v_stride, dst_sample,
dst_sample_stride, dst_u, halfstride, dst_v, halfstride,
@ -851,11 +842,11 @@ int ConvertFromI420(const uint8_t* y,
uint8_t* dst_u;
uint8_t* dst_v;
if (format == FOURCC_YV24) {
dst_v = dst_sample + (ptrdiff_t)dst_sample_stride * height;
dst_u = dst_v + (ptrdiff_t)dst_sample_stride * height;
dst_v = dst_sample + dst_sample_stride * height;
dst_u = dst_v + dst_sample_stride * height;
} else {
dst_u = dst_sample + (ptrdiff_t)dst_sample_stride * height;
dst_v = dst_u + (ptrdiff_t)dst_sample_stride * height;
dst_u = dst_sample + dst_sample_stride * height;
dst_v = dst_u + dst_sample_stride * height;
}
r = I420ToI444(y, y_stride, u, u_stride, v, v_stride, dst_sample,
dst_sample_stride, dst_u, dst_sample_stride, dst_v,

File diff suppressed because it is too large Load Diff

View File

@ -11,7 +11,6 @@
#include "libyuv/convert_argb.h"
#include <limits.h>
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
@ -51,26 +50,12 @@ int ConvertToARGB(const uint8_t* sample,
int crop_height,
enum RotationMode rotation,
uint32_t fourcc) {
if (src_height == INT_MIN || crop_height == INT_MIN) {
return -1;
}
int abs_src_height = (src_height < 0) ? -src_height : src_height;
int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;
if (dst_argb == NULL || sample == NULL || src_width <= 0 ||
src_width > INT_MAX / 4 || crop_width <= 0 || crop_width > INT_MAX / 4 ||
src_height == 0 || crop_height == 0 || crop_x < 0 || crop_y < 0 ||
crop_width > src_width || crop_x > src_width - crop_width ||
abs_crop_height > abs_src_height ||
crop_y > abs_src_height - abs_crop_height) {
return -1;
}
uint32_t format = CanonicalFourCC(fourcc);
int aligned_src_width = (src_width + 1) & ~1;
const uint8_t* src;
const uint8_t* src_uv;
int abs_src_height = (src_height < 0) ? -src_height : src_height;
int inv_crop_height = (crop_height < 0) ? -crop_height : crop_height;
int r = 0;
// One pass rotation is available for some formats. For the rest, convert
@ -83,8 +68,13 @@ int ConvertToARGB(const uint8_t* sample,
uint8_t* dest_argb = dst_argb;
int dest_dst_stride_argb = dst_stride_argb;
uint8_t* rotate_buffer = NULL;
int inv_crop_height = (crop_height < 0) ? -crop_height : crop_height;
int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;
if (dst_argb == NULL || sample == NULL || src_width <= 0 ||
src_width > INT_MAX / 4 || crop_width <= 0 || crop_width > INT_MAX / 4 ||
src_height == 0 || crop_height == 0) {
return -1;
}
if (src_height < 0) {
inv_crop_height = -inv_crop_height;
}
@ -106,97 +96,95 @@ int ConvertToARGB(const uint8_t* sample,
switch (format) {
// Single plane formats
case FOURCC_YUY2:
src = sample + ((ptrdiff_t)aligned_src_width * crop_y + crop_x) * 2;
src = sample + (aligned_src_width * crop_y + crop_x) * 2;
r = YUY2ToARGB(src, aligned_src_width * 2, dst_argb, dst_stride_argb,
crop_width, inv_crop_height);
break;
case FOURCC_UYVY:
src = sample + ((ptrdiff_t)aligned_src_width * crop_y + crop_x) * 2;
src = sample + (aligned_src_width * crop_y + crop_x) * 2;
r = UYVYToARGB(src, aligned_src_width * 2, dst_argb, dst_stride_argb,
crop_width, inv_crop_height);
break;
case FOURCC_24BG:
src = sample + ((ptrdiff_t)src_width * crop_y + crop_x) * 3;
src = sample + (src_width * crop_y + crop_x) * 3;
r = RGB24ToARGB(src, src_width * 3, dst_argb, dst_stride_argb, crop_width,
inv_crop_height);
break;
case FOURCC_RAW:
src = sample + ((ptrdiff_t)src_width * crop_y + crop_x) * 3;
src = sample + (src_width * crop_y + crop_x) * 3;
r = RAWToARGB(src, src_width * 3, dst_argb, dst_stride_argb, crop_width,
inv_crop_height);
break;
case FOURCC_ARGB:
if (!need_buf && !rotation) {
src = sample + ((ptrdiff_t)src_width * crop_y + crop_x) * 4;
src = sample + (src_width * crop_y + crop_x) * 4;
r = ARGBToARGB(src, src_width * 4, dst_argb, dst_stride_argb,
crop_width, inv_crop_height);
}
break;
case FOURCC_BGRA:
src = sample + ((ptrdiff_t)src_width * crop_y + crop_x) * 4;
src = sample + (src_width * crop_y + crop_x) * 4;
r = BGRAToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width,
inv_crop_height);
break;
case FOURCC_ABGR:
src = sample + ((ptrdiff_t)src_width * crop_y + crop_x) * 4;
src = sample + (src_width * crop_y + crop_x) * 4;
r = ABGRToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width,
inv_crop_height);
break;
case FOURCC_RGBA:
src = sample + ((ptrdiff_t)src_width * crop_y + crop_x) * 4;
src = sample + (src_width * crop_y + crop_x) * 4;
r = RGBAToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width,
inv_crop_height);
break;
case FOURCC_AR30:
src = sample + ((ptrdiff_t)src_width * crop_y + crop_x) * 4;
src = sample + (src_width * crop_y + crop_x) * 4;
r = AR30ToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width,
inv_crop_height);
break;
case FOURCC_AB30:
src = sample + ((ptrdiff_t)src_width * crop_y + crop_x) * 4;
src = sample + (src_width * crop_y + crop_x) * 4;
r = AB30ToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width,
inv_crop_height);
break;
case FOURCC_RGBP:
src = sample + ((ptrdiff_t)src_width * crop_y + crop_x) * 2;
src = sample + (src_width * crop_y + crop_x) * 2;
r = RGB565ToARGB(src, src_width * 2, dst_argb, dst_stride_argb,
crop_width, inv_crop_height);
break;
case FOURCC_RGBO:
src = sample + ((ptrdiff_t)src_width * crop_y + crop_x) * 2;
src = sample + (src_width * crop_y + crop_x) * 2;
r = ARGB1555ToARGB(src, src_width * 2, dst_argb, dst_stride_argb,
crop_width, inv_crop_height);
break;
case FOURCC_R444:
src = sample + ((ptrdiff_t)src_width * crop_y + crop_x) * 2;
src = sample + (src_width * crop_y + crop_x) * 2;
r = ARGB4444ToARGB(src, src_width * 2, dst_argb, dst_stride_argb,
crop_width, inv_crop_height);
break;
case FOURCC_I400:
src = sample + (ptrdiff_t)src_width * crop_y + crop_x;
src = sample + src_width * crop_y + crop_x;
r = I400ToARGB(src, src_width, dst_argb, dst_stride_argb, crop_width,
inv_crop_height);
break;
case FOURCC_J400:
src = sample + (ptrdiff_t)src_width * crop_y + crop_x;
src = sample + src_width * crop_y + crop_x;
r = J400ToARGB(src, src_width, dst_argb, dst_stride_argb, crop_width,
inv_crop_height);
break;
// Biplanar formats
case FOURCC_NV12:
src = sample + ((ptrdiff_t)src_width * crop_y + crop_x);
src_uv = sample +
aligned_src_width * ((ptrdiff_t)abs_src_height + crop_y / 2) +
crop_x;
src = sample + (src_width * crop_y + crop_x);
src_uv =
sample + aligned_src_width * (abs_src_height + crop_y / 2) + crop_x;
r = NV12ToARGB(src, src_width, src_uv, aligned_src_width, dst_argb,
dst_stride_argb, crop_width, inv_crop_height);
break;
case FOURCC_NV21:
src = sample + ((ptrdiff_t)src_width * crop_y + crop_x);
src_uv = sample +
aligned_src_width * ((ptrdiff_t)abs_src_height + crop_y / 2) +
crop_x;
src = sample + (src_width * crop_y + crop_x);
src_uv =
sample + aligned_src_width * (abs_src_height + crop_y / 2) + crop_x;
// Call NV12 but with u and v parameters swapped.
r = NV21ToARGB(src, src_width, src_uv, aligned_src_width, dst_argb,
dst_stride_argb, crop_width, inv_crop_height);
@ -204,21 +192,21 @@ int ConvertToARGB(const uint8_t* sample,
// Triplanar formats
case FOURCC_I420:
case FOURCC_YV12: {
const uint8_t* src_y = sample + ((ptrdiff_t)src_width * crop_y + crop_x);
const uint8_t* src_y = sample + (src_width * crop_y + crop_x);
const uint8_t* src_u;
const uint8_t* src_v;
int halfwidth = (src_width + 1) / 2;
int halfheight = (abs_src_height + 1) / 2;
if (format == FOURCC_YV12) {
src_v = sample + (ptrdiff_t)src_width * abs_src_height +
((ptrdiff_t)halfwidth * crop_y + crop_x) / 2;
src_u = sample + (ptrdiff_t)src_width * abs_src_height +
halfwidth * ((ptrdiff_t)halfheight + crop_y / 2) + crop_x / 2;
src_v = sample + src_width * abs_src_height +
(halfwidth * crop_y + crop_x) / 2;
src_u = sample + src_width * abs_src_height +
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
} else {
src_u = sample + (ptrdiff_t)src_width * abs_src_height +
((ptrdiff_t)halfwidth * crop_y + crop_x) / 2;
src_v = sample + (ptrdiff_t)src_width * abs_src_height +
halfwidth * ((ptrdiff_t)halfheight + crop_y / 2) + crop_x / 2;
src_u = sample + src_width * abs_src_height +
(halfwidth * crop_y + crop_x) / 2;
src_v = sample + src_width * abs_src_height +
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
}
r = I420ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth,
dst_argb, dst_stride_argb, crop_width, inv_crop_height);
@ -228,12 +216,11 @@ int ConvertToARGB(const uint8_t* sample,
case FOURCC_J420: {
int halfwidth = (src_width + 1) / 2;
int halfheight = (abs_src_height + 1) / 2;
const uint8_t* src_y = sample + ((ptrdiff_t)src_width * crop_y + crop_x);
const uint8_t* src_u = sample + (ptrdiff_t)src_width * abs_src_height +
((ptrdiff_t)halfwidth * crop_y + crop_x) / 2;
const uint8_t* src_v = sample + (ptrdiff_t)src_width * abs_src_height +
halfwidth * ((ptrdiff_t)halfheight + crop_y / 2) +
crop_x / 2;
const uint8_t* src_y = sample + (src_width * crop_y + crop_x);
const uint8_t* src_u = sample + src_width * abs_src_height +
(halfwidth * crop_y + crop_x) / 2;
const uint8_t* src_v = sample + src_width * abs_src_height +
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
r = J420ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth,
dst_argb, dst_stride_argb, crop_width, inv_crop_height);
break;
@ -242,12 +229,11 @@ int ConvertToARGB(const uint8_t* sample,
case FOURCC_H420: {
int halfwidth = (src_width + 1) / 2;
int halfheight = (abs_src_height + 1) / 2;
const uint8_t* src_y = sample + ((ptrdiff_t)src_width * crop_y + crop_x);
const uint8_t* src_u = sample + (ptrdiff_t)src_width * abs_src_height +
((ptrdiff_t)halfwidth * crop_y + crop_x) / 2;
const uint8_t* src_v = sample + (ptrdiff_t)src_width * abs_src_height +
halfwidth * ((ptrdiff_t)halfheight + crop_y / 2) +
crop_x / 2;
const uint8_t* src_y = sample + (src_width * crop_y + crop_x);
const uint8_t* src_u = sample + src_width * abs_src_height +
(halfwidth * crop_y + crop_x) / 2;
const uint8_t* src_v = sample + src_width * abs_src_height +
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
r = H420ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth,
dst_argb, dst_stride_argb, crop_width, inv_crop_height);
break;
@ -256,12 +242,11 @@ int ConvertToARGB(const uint8_t* sample,
case FOURCC_U420: {
int halfwidth = (src_width + 1) / 2;
int halfheight = (abs_src_height + 1) / 2;
const uint8_t* src_y = sample + ((ptrdiff_t)src_width * crop_y + crop_x);
const uint8_t* src_u = sample + (ptrdiff_t)src_width * abs_src_height +
((ptrdiff_t)halfwidth * crop_y + crop_x) / 2;
const uint8_t* src_v = sample + (ptrdiff_t)src_width * abs_src_height +
halfwidth * ((ptrdiff_t)halfheight + crop_y / 2) +
crop_x / 2;
const uint8_t* src_y = sample + (src_width * crop_y + crop_x);
const uint8_t* src_u = sample + src_width * abs_src_height +
(halfwidth * crop_y + crop_x) / 2;
const uint8_t* src_v = sample + src_width * abs_src_height +
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
r = U420ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth,
dst_argb, dst_stride_argb, crop_width, inv_crop_height);
break;
@ -270,19 +255,19 @@ int ConvertToARGB(const uint8_t* sample,
case FOURCC_I422:
case FOURCC_YV16: {
int halfwidth = (src_width + 1) / 2;
const uint8_t* src_y = sample + (ptrdiff_t)src_width * crop_y + crop_x;
const uint8_t* src_y = sample + src_width * crop_y + crop_x;
const uint8_t* src_u;
const uint8_t* src_v;
if (format == FOURCC_YV16) {
src_v = sample + (ptrdiff_t)src_width * abs_src_height +
(ptrdiff_t)halfwidth * crop_y + crop_x / 2;
src_u = sample + (ptrdiff_t)src_width * abs_src_height +
halfwidth * ((ptrdiff_t)abs_src_height + crop_y) + crop_x / 2;
src_v = sample + src_width * abs_src_height + halfwidth * crop_y +
crop_x / 2;
src_u = sample + src_width * abs_src_height +
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
} else {
src_u = sample + (ptrdiff_t)src_width * abs_src_height +
(ptrdiff_t)halfwidth * crop_y + crop_x / 2;
src_v = sample + (ptrdiff_t)src_width * abs_src_height +
halfwidth * ((ptrdiff_t)abs_src_height + crop_y) + crop_x / 2;
src_u = sample + src_width * abs_src_height + halfwidth * crop_y +
crop_x / 2;
src_v = sample + src_width * abs_src_height +
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
}
r = I422ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth,
dst_argb, dst_stride_argb, crop_width, inv_crop_height);
@ -291,12 +276,11 @@ int ConvertToARGB(const uint8_t* sample,
case FOURCC_J422: {
int halfwidth = (src_width + 1) / 2;
const uint8_t* src_y = sample + (ptrdiff_t)src_width * crop_y + crop_x;
const uint8_t* src_u = sample + (ptrdiff_t)src_width * abs_src_height +
(ptrdiff_t)halfwidth * crop_y + crop_x / 2;
const uint8_t* src_v = sample + (ptrdiff_t)src_width * abs_src_height +
halfwidth * ((ptrdiff_t)abs_src_height + crop_y) +
crop_x / 2;
const uint8_t* src_y = sample + src_width * crop_y + crop_x;
const uint8_t* src_u =
sample + src_width * abs_src_height + halfwidth * crop_y + crop_x / 2;
const uint8_t* src_v = sample + src_width * abs_src_height +
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
r = J422ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth,
dst_argb, dst_stride_argb, crop_width, inv_crop_height);
break;
@ -304,12 +288,11 @@ int ConvertToARGB(const uint8_t* sample,
case FOURCC_H422: {
int halfwidth = (src_width + 1) / 2;
const uint8_t* src_y = sample + (ptrdiff_t)src_width * crop_y + crop_x;
const uint8_t* src_u = sample + (ptrdiff_t)src_width * abs_src_height +
(ptrdiff_t)halfwidth * crop_y + crop_x / 2;
const uint8_t* src_v = sample + (ptrdiff_t)src_width * abs_src_height +
halfwidth * ((ptrdiff_t)abs_src_height + crop_y) +
crop_x / 2;
const uint8_t* src_y = sample + src_width * crop_y + crop_x;
const uint8_t* src_u =
sample + src_width * abs_src_height + halfwidth * crop_y + crop_x / 2;
const uint8_t* src_v = sample + src_width * abs_src_height +
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
r = H422ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth,
dst_argb, dst_stride_argb, crop_width, inv_crop_height);
break;
@ -317,12 +300,11 @@ int ConvertToARGB(const uint8_t* sample,
case FOURCC_U422: {
int halfwidth = (src_width + 1) / 2;
const uint8_t* src_y = sample + (ptrdiff_t)src_width * crop_y + crop_x;
const uint8_t* src_u = sample + (ptrdiff_t)src_width * abs_src_height +
(ptrdiff_t)halfwidth * crop_y + crop_x / 2;
const uint8_t* src_v = sample + (ptrdiff_t)src_width * abs_src_height +
halfwidth * ((ptrdiff_t)abs_src_height + crop_y) +
crop_x / 2;
const uint8_t* src_y = sample + src_width * crop_y + crop_x;
const uint8_t* src_u =
sample + src_width * abs_src_height + halfwidth * crop_y + crop_x / 2;
const uint8_t* src_v = sample + src_width * abs_src_height +
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
r = H422ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth,
dst_argb, dst_stride_argb, crop_width, inv_crop_height);
break;
@ -330,19 +312,15 @@ int ConvertToARGB(const uint8_t* sample,
case FOURCC_I444:
case FOURCC_YV24: {
const uint8_t* src_y = sample + (ptrdiff_t)src_width * crop_y + crop_x;
const uint8_t* src_y = sample + src_width * crop_y + crop_x;
const uint8_t* src_u;
const uint8_t* src_v;
if (format == FOURCC_YV24) {
src_v =
sample + src_width * ((ptrdiff_t)abs_src_height + crop_y) + crop_x;
src_u = sample + src_width * ((ptrdiff_t)abs_src_height * 2 + crop_y) +
crop_x;
src_v = sample + src_width * (abs_src_height + crop_y) + crop_x;
src_u = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
} else {
src_u =
sample + src_width * ((ptrdiff_t)abs_src_height + crop_y) + crop_x;
src_v = sample + src_width * ((ptrdiff_t)abs_src_height * 2 + crop_y) +
crop_x;
src_u = sample + src_width * (abs_src_height + crop_y) + crop_x;
src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
}
r = I444ToARGB(src_y, src_width, src_u, src_width, src_v, src_width,
dst_argb, dst_stride_argb, crop_width, inv_crop_height);
@ -350,36 +328,33 @@ int ConvertToARGB(const uint8_t* sample,
}
case FOURCC_J444: {
const uint8_t* src_y = sample + (ptrdiff_t)src_width * crop_y + crop_x;
const uint8_t* src_u =
sample + src_width * ((ptrdiff_t)abs_src_height + crop_y) + crop_x;
const uint8_t* src_v =
sample + src_width * ((ptrdiff_t)abs_src_height * 2 + crop_y) +
crop_x;
const uint8_t* src_y = sample + src_width * crop_y + crop_x;
const uint8_t* src_u;
const uint8_t* src_v;
src_u = sample + src_width * (abs_src_height + crop_y) + crop_x;
src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
r = J444ToARGB(src_y, src_width, src_u, src_width, src_v, src_width,
dst_argb, dst_stride_argb, crop_width, inv_crop_height);
break;
}
case FOURCC_H444: {
const uint8_t* src_y = sample + (ptrdiff_t)src_width * crop_y + crop_x;
const uint8_t* src_u =
sample + src_width * ((ptrdiff_t)abs_src_height + crop_y) + crop_x;
const uint8_t* src_v =
sample + src_width * ((ptrdiff_t)abs_src_height * 2 + crop_y) +
crop_x;
const uint8_t* src_y = sample + src_width * crop_y + crop_x;
const uint8_t* src_u;
const uint8_t* src_v;
src_u = sample + src_width * (abs_src_height + crop_y) + crop_x;
src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
r = H444ToARGB(src_y, src_width, src_u, src_width, src_v, src_width,
dst_argb, dst_stride_argb, crop_width, inv_crop_height);
break;
}
case FOURCC_U444: {
const uint8_t* src_y = sample + (ptrdiff_t)src_width * crop_y + crop_x;
const uint8_t* src_u =
sample + src_width * ((ptrdiff_t)abs_src_height + crop_y) + crop_x;
const uint8_t* src_v =
sample + src_width * ((ptrdiff_t)abs_src_height * 2 + crop_y) +
crop_x;
const uint8_t* src_y = sample + src_width * crop_y + crop_x;
const uint8_t* src_u;
const uint8_t* src_v;
src_u = sample + src_width * (abs_src_height + crop_y) + crop_x;
src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
r = U444ToARGB(src_y, src_width, src_u, src_width, src_v, src_width,
dst_argb, dst_stride_argb, crop_width, inv_crop_height);
break;
@ -402,7 +377,7 @@ int ConvertToARGB(const uint8_t* sample,
}
free(rotate_buffer);
} else if (rotation) {
src = sample + ((ptrdiff_t)src_width * crop_y + crop_x) * 4;
src = sample + (src_width * crop_y + crop_x) * 4;
r = ARGBRotate(src, src_width * 4, dst_argb, dst_stride_argb, crop_width,
inv_crop_height, rotation);
}

View File

@ -44,24 +44,12 @@ int ConvertToI420(const uint8_t* sample,
int crop_height,
enum RotationMode rotation,
uint32_t fourcc) {
if (src_height == INT_MIN || crop_height == INT_MIN) {
return -1;
}
const int abs_src_height = (src_height < 0) ? -src_height : src_height;
const int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;
if (!dst_y || !dst_u || !dst_v || !sample || src_width <= 0 ||
src_width > INT_MAX / 4 || crop_width <= 0 || src_height == 0 ||
crop_height == 0 || crop_x < 0 || crop_y < 0 || crop_width > src_width ||
crop_x > src_width - crop_width || abs_crop_height > abs_src_height ||
crop_y > abs_src_height - abs_crop_height) {
return -1;
}
uint32_t format = CanonicalFourCC(fourcc);
int aligned_src_width = (src_width + 1) & ~1;
const uint8_t* src;
const uint8_t* src_uv;
const int abs_src_height = (src_height < 0) ? -src_height : src_height;
const int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;
int r = 0;
LIBYUV_BOOL need_buf =
(rotation && format != FOURCC_I420 && format != FOURCC_NV12 &&
@ -76,7 +64,12 @@ int ConvertToI420(const uint8_t* sample,
uint8_t* rotate_buffer = NULL;
const int inv_crop_height =
(src_height < 0) ? -abs_crop_height : abs_crop_height;
int aligned_src_width = (src_width + 1) & ~1;
if (!dst_y || !dst_u || !dst_v || !sample || src_width <= 0 ||
src_width > INT_MAX / 4 || crop_width <= 0 || src_height == 0 ||
crop_height == 0) {
return -1;
}
// One pass rotation is available for some formats. For the rest, convert
// to I420 (with optional vertical flipping) into a temporary I420 buffer,
@ -84,14 +77,14 @@ int ConvertToI420(const uint8_t* sample,
// For in-place conversion, if destination dst_y is same as source sample,
// also enable temporary buffer.
if (need_buf) {
size_t y_size = (size_t)crop_width * abs_crop_height;
size_t uv_size =
(size_t)((crop_width + 1) / 2) * ((abs_crop_height + 1) / 2);
if (uv_size > SIZE_MAX / 2 || y_size > SIZE_MAX - uv_size * 2) {
int y_size = crop_width * abs_crop_height;
int uv_size = ((crop_width + 1) / 2) * ((abs_crop_height + 1) / 2);
const uint64_t rotate_buffer_size =
(uint64_t)y_size + (uint64_t)uv_size * 2;
if (rotate_buffer_size > SIZE_MAX) {
return -1; // Invalid size.
}
const size_t rotate_buffer_size = y_size + uv_size * 2;
rotate_buffer = (uint8_t*)malloc(rotate_buffer_size);
rotate_buffer = (uint8_t*)malloc((size_t)rotate_buffer_size);
if (!rotate_buffer) {
return 1; // Out of memory runtime error.
}
@ -109,7 +102,7 @@ int ConvertToI420(const uint8_t* sample,
uint8_t* v = (crop_x & 1) ? dst_u : dst_v;
int stride_u = (crop_x & 1) ? dst_stride_v : dst_stride_u;
int stride_v = (crop_x & 1) ? dst_stride_u : dst_stride_v;
src = sample + ((ptrdiff_t)aligned_src_width * crop_y + crop_x) * 2;
src = sample + (aligned_src_width * crop_y + crop_x) * 2;
r = YUY2ToI420(src, aligned_src_width * 2, dst_y, dst_stride_y, u,
stride_u, v, stride_v, crop_width, inv_crop_height);
break;
@ -119,86 +112,84 @@ int ConvertToI420(const uint8_t* sample,
uint8_t* v = (crop_x & 1) ? dst_u : dst_v;
int stride_u = (crop_x & 1) ? dst_stride_v : dst_stride_u;
int stride_v = (crop_x & 1) ? dst_stride_u : dst_stride_v;
src = sample + ((ptrdiff_t)aligned_src_width * crop_y + crop_x) * 2;
src = sample + (aligned_src_width * crop_y + crop_x) * 2;
r = UYVYToI420(src, aligned_src_width * 2, dst_y, dst_stride_y, u,
stride_u, v, stride_v, crop_width, inv_crop_height);
break;
}
case FOURCC_RGBP:
src = sample + ((ptrdiff_t)src_width * crop_y + crop_x) * 2;
src = sample + (src_width * crop_y + crop_x) * 2;
r = RGB565ToI420(src, src_width * 2, dst_y, dst_stride_y, dst_u,
dst_stride_u, dst_v, dst_stride_v, crop_width,
inv_crop_height);
break;
case FOURCC_RGBO:
src = sample + ((ptrdiff_t)src_width * crop_y + crop_x) * 2;
src = sample + (src_width * crop_y + crop_x) * 2;
r = ARGB1555ToI420(src, src_width * 2, dst_y, dst_stride_y, dst_u,
dst_stride_u, dst_v, dst_stride_v, crop_width,
inv_crop_height);
break;
case FOURCC_R444:
src = sample + ((ptrdiff_t)src_width * crop_y + crop_x) * 2;
src = sample + (src_width * crop_y + crop_x) * 2;
r = ARGB4444ToI420(src, src_width * 2, dst_y, dst_stride_y, dst_u,
dst_stride_u, dst_v, dst_stride_v, crop_width,
inv_crop_height);
break;
case FOURCC_24BG:
src = sample + ((ptrdiff_t)src_width * crop_y + crop_x) * 3;
src = sample + (src_width * crop_y + crop_x) * 3;
r = RGB24ToI420(src, src_width * 3, dst_y, dst_stride_y, dst_u,
dst_stride_u, dst_v, dst_stride_v, crop_width,
inv_crop_height);
break;
case FOURCC_RAW:
src = sample + ((ptrdiff_t)src_width * crop_y + crop_x) * 3;
src = sample + (src_width * crop_y + crop_x) * 3;
r = RAWToI420(src, src_width * 3, dst_y, dst_stride_y, dst_u,
dst_stride_u, dst_v, dst_stride_v, crop_width,
inv_crop_height);
break;
case FOURCC_ARGB:
src = sample + ((ptrdiff_t)src_width * crop_y + crop_x) * 4;
src = sample + (src_width * crop_y + crop_x) * 4;
r = ARGBToI420(src, src_width * 4, dst_y, dst_stride_y, dst_u,
dst_stride_u, dst_v, dst_stride_v, crop_width,
inv_crop_height);
break;
case FOURCC_BGRA:
src = sample + ((ptrdiff_t)src_width * crop_y + crop_x) * 4;
src = sample + (src_width * crop_y + crop_x) * 4;
r = BGRAToI420(src, src_width * 4, dst_y, dst_stride_y, dst_u,
dst_stride_u, dst_v, dst_stride_v, crop_width,
inv_crop_height);
break;
case FOURCC_ABGR:
src = sample + ((ptrdiff_t)src_width * crop_y + crop_x) * 4;
src = sample + (src_width * crop_y + crop_x) * 4;
r = ABGRToI420(src, src_width * 4, dst_y, dst_stride_y, dst_u,
dst_stride_u, dst_v, dst_stride_v, crop_width,
inv_crop_height);
break;
case FOURCC_RGBA:
src = sample + ((ptrdiff_t)src_width * crop_y + crop_x) * 4;
src = sample + (src_width * crop_y + crop_x) * 4;
r = RGBAToI420(src, src_width * 4, dst_y, dst_stride_y, dst_u,
dst_stride_u, dst_v, dst_stride_v, crop_width,
inv_crop_height);
break;
// TODO(fbarchard): Add AR30 and AB30
case FOURCC_I400:
src = sample + (ptrdiff_t)src_width * crop_y + crop_x;
src = sample + src_width * crop_y + crop_x;
r = I400ToI420(src, src_width, dst_y, dst_stride_y, dst_u, dst_stride_u,
dst_v, dst_stride_v, crop_width, inv_crop_height);
break;
// Biplanar formats
case FOURCC_NV12:
src = sample + ((ptrdiff_t)src_width * crop_y + crop_x);
src_uv = sample + ((ptrdiff_t)src_width * abs_src_height) +
((ptrdiff_t)(crop_y / 2) * aligned_src_width) +
((crop_x / 2) * 2);
src = sample + (src_width * crop_y + crop_x);
src_uv = sample + (src_width * abs_src_height) +
((crop_y / 2) * aligned_src_width) + ((crop_x / 2) * 2);
r = NV12ToI420Rotate(src, src_width, src_uv, aligned_src_width, dst_y,
dst_stride_y, dst_u, dst_stride_u, dst_v,
dst_stride_v, crop_width, inv_crop_height, rotation);
break;
case FOURCC_NV21:
src = sample + ((ptrdiff_t)src_width * crop_y + crop_x);
src_uv = sample + ((ptrdiff_t)src_width * abs_src_height) +
((ptrdiff_t)(crop_y / 2) * aligned_src_width) +
((crop_x / 2) * 2);
src = sample + (src_width * crop_y + crop_x);
src_uv = sample + (src_width * abs_src_height) +
((crop_y / 2) * aligned_src_width) + ((crop_x / 2) * 2);
// Call NV12 but with dst_u and dst_v parameters swapped.
r = NV12ToI420Rotate(src, src_width, src_uv, aligned_src_width, dst_y,
dst_stride_y, dst_v, dst_stride_v, dst_u,
@ -207,23 +198,21 @@ int ConvertToI420(const uint8_t* sample,
// Triplanar formats
case FOURCC_I420:
case FOURCC_YV12: {
const uint8_t* src_y = sample + ((ptrdiff_t)src_width * crop_y + crop_x);
const uint8_t* src_y = sample + (src_width * crop_y + crop_x);
const uint8_t* src_u;
const uint8_t* src_v;
int halfwidth = (src_width + 1) / 2;
int halfheight = (abs_src_height + 1) / 2;
if (format == FOURCC_YV12) {
src_v = sample + (ptrdiff_t)src_width * abs_src_height +
(ptrdiff_t)halfwidth * (crop_y / 2) + (crop_x / 2);
src_u = sample + (ptrdiff_t)src_width * abs_src_height +
halfwidth * ((ptrdiff_t)halfheight + (crop_y / 2)) +
src_v = sample + src_width * abs_src_height + halfwidth * (crop_y / 2) +
(crop_x / 2);
src_u = sample + src_width * abs_src_height +
halfwidth * (halfheight + (crop_y / 2)) + (crop_x / 2);
} else {
src_u = sample + (ptrdiff_t)src_width * abs_src_height +
(ptrdiff_t)halfwidth * (crop_y / 2) + (crop_x / 2);
src_v = sample + (ptrdiff_t)src_width * abs_src_height +
halfwidth * ((ptrdiff_t)halfheight + (crop_y / 2)) +
src_u = sample + src_width * abs_src_height + halfwidth * (crop_y / 2) +
(crop_x / 2);
src_v = sample + src_width * abs_src_height +
halfwidth * (halfheight + (crop_y / 2)) + (crop_x / 2);
}
r = I420Rotate(src_y, src_width, src_u, halfwidth, src_v, halfwidth,
dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v,
@ -232,20 +221,20 @@ int ConvertToI420(const uint8_t* sample,
}
case FOURCC_I422:
case FOURCC_YV16: {
const uint8_t* src_y = sample + (ptrdiff_t)src_width * crop_y + crop_x;
const uint8_t* src_y = sample + src_width * crop_y + crop_x;
const uint8_t* src_u;
const uint8_t* src_v;
int halfwidth = (src_width + 1) / 2;
if (format == FOURCC_YV16) {
src_v = sample + (ptrdiff_t)src_width * abs_src_height +
(ptrdiff_t)halfwidth * crop_y + (crop_x / 2);
src_u = sample + (ptrdiff_t)src_width * abs_src_height +
halfwidth * ((ptrdiff_t)abs_src_height + crop_y) + (crop_x / 2);
src_v = sample + src_width * abs_src_height + halfwidth * crop_y +
(crop_x / 2);
src_u = sample + src_width * abs_src_height +
halfwidth * (abs_src_height + crop_y) + (crop_x / 2);
} else {
src_u = sample + (ptrdiff_t)src_width * abs_src_height +
(ptrdiff_t)halfwidth * crop_y + (crop_x / 2);
src_v = sample + (ptrdiff_t)src_width * abs_src_height +
halfwidth * ((ptrdiff_t)abs_src_height + crop_y) + (crop_x / 2);
src_u = sample + src_width * abs_src_height + halfwidth * crop_y +
(crop_x / 2);
src_v = sample + src_width * abs_src_height +
halfwidth * (abs_src_height + crop_y) + (crop_x / 2);
}
r = I422ToI420(src_y, src_width, src_u, halfwidth, src_v, halfwidth,
dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v,
@ -254,19 +243,15 @@ int ConvertToI420(const uint8_t* sample,
}
case FOURCC_I444:
case FOURCC_YV24: {
const uint8_t* src_y = sample + (ptrdiff_t)src_width * crop_y + crop_x;
const uint8_t* src_y = sample + src_width * crop_y + crop_x;
const uint8_t* src_u;
const uint8_t* src_v;
if (format == FOURCC_YV24) {
src_v =
sample + src_width * ((ptrdiff_t)abs_src_height + crop_y) + crop_x;
src_u = sample + src_width * ((ptrdiff_t)abs_src_height * 2 + crop_y) +
crop_x;
src_v = sample + src_width * (abs_src_height + crop_y) + crop_x;
src_u = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
} else {
src_u =
sample + src_width * ((ptrdiff_t)abs_src_height + crop_y) + crop_x;
src_v = sample + src_width * ((ptrdiff_t)abs_src_height * 2 + crop_y) +
crop_x;
src_u = sample + src_width * (abs_src_height + crop_y) + crop_x;
src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
}
r = I444ToI420(src_y, src_width, src_u, src_width, src_v, src_width,
dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v,

View File

@ -397,6 +397,7 @@ static SAFEBUFFERS int GetCpuFlags(void) {
int cpu_info7[4] = {0, 0, 0, 0};
int cpu_einfo7[4] = {0, 0, 0, 0};
int cpu_info24[4] = {0, 0, 0, 0};
int cpu_info21[4] = {0, 0, 0, 0};
int cpu_amdinfo21[4] = {0, 0, 0, 0};
CpuId(0, 0, cpu_info0);
CpuId(1, 0, cpu_info1);
@ -405,6 +406,9 @@ static SAFEBUFFERS int GetCpuFlags(void) {
CpuId(7, 1, cpu_einfo7);
CpuId(0x80000021, 0, cpu_amdinfo21);
}
if (cpu_info0[0] >= 0x21) {
CpuId(0x21, 0, cpu_info21);
}
if (cpu_info0[0] >= 0x24) {
CpuId(0x24, 0, cpu_info24);
}
@ -435,7 +439,8 @@ static SAFEBUFFERS int GetCpuFlags(void) {
((cpu_info7[2] & 0x00000800) ? kCpuHasAVX512VNNI : 0) |
((cpu_info7[2] & 0x00001000) ? kCpuHasAVX512VBITALG : 0) |
((cpu_einfo7[3] & 0x00080000) ? kCpuHasAVX10 : 0) |
((cpu_info7[3] & 0x02000000) ? kCpuHasAMXINT8 : 0);
((cpu_info7[3] & 0x02000000) ? kCpuHasAMXINT8 : 0) |
((cpu_info21[0] & 0x00800000) ? kCpuHasAVX512BMM : 0);
if (cpu_info0[0] >= 0x24 && (cpu_einfo7[3] & 0x00080000)) {
cpu_info |= ((cpu_info24[1] & 0xFF) >= 2) ? kCpuHasAVX10_2 : 0;
}

File diff suppressed because it is too large Load Diff

View File

@ -8,10 +8,9 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/rotate.h"
#include <assert.h>
#include <limits.h>
#include "libyuv/rotate.h"
#include "libyuv/convert.h"
#include "libyuv/cpu_id.h"
@ -129,7 +128,7 @@ void RotatePlane90(const uint8_t* src,
// Rotate by 90 is a transpose with the source read
// from bottom to top. So set the source pointer to the end
// of the buffer and flip the sign of the source stride.
src += (ptrdiff_t)src_stride * (height - 1);
src += src_stride * (height - 1);
src_stride = -src_stride;
TransposePlane(src, src_stride, dst, dst_stride, width, height);
}
@ -144,7 +143,7 @@ void RotatePlane270(const uint8_t* src,
// Rotate by 270 is a transpose with the destination written
// from bottom to top. So set the destination pointer to the end
// of the buffer and flip the sign of the destination stride.
dst += (ptrdiff_t)dst_stride * (width - 1);
dst += dst_stride * (width - 1);
dst_stride = -dst_stride;
TransposePlane(src, src_stride, dst, dst_stride, width, height);
}
@ -161,8 +160,8 @@ void RotatePlane180(const uint8_t* src,
assert(row);
if (!row)
return;
const uint8_t* src_bot = src + (ptrdiff_t)src_stride * (height - 1);
uint8_t* dst_bot = dst + (ptrdiff_t)dst_stride * (height - 1);
const uint8_t* src_bot = src + src_stride * (height - 1);
uint8_t* dst_bot = dst + dst_stride * (height - 1);
int half_height = (height + 1) >> 1;
int y;
void (*MirrorRow)(const uint8_t* src, uint8_t* dst, int width) = MirrorRow_C;
@ -355,7 +354,7 @@ void SplitRotateUV90(const uint8_t* src,
int dst_stride_b,
int width,
int height) {
src += (ptrdiff_t)src_stride * (height - 1);
src += src_stride * (height - 1);
src_stride = -src_stride;
SplitTransposeUV(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b,
@ -398,14 +397,9 @@ void SplitRotateUV180(const uint8_t* src,
MirrorSplitUVRow = MirrorSplitUVRow_NEON;
}
#endif
#if defined(HAS_MIRRORSPLITUVROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 32)) {
MirrorSplitUVRow = MirrorSplitUVRow_AVX2;
}
#endif
#if defined(HAS_MIRRORSPLITUVROW_AVX512BW)
if (TestCpuFlag(kCpuHasAVX512BW) && IS_ALIGNED(width, 32)) {
MirrorSplitUVRow = MirrorSplitUVRow_AVX512BW;
#if defined(HAS_MIRRORSPLITUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16)) {
MirrorSplitUVRow = MirrorSplitUVRow_SSSE3;
}
#endif
#if defined(HAS_MIRRORSPLITUVROW_LSX)
@ -437,15 +431,14 @@ int SplitRotateUV(const uint8_t* src_uv,
int width,
int height,
enum RotationMode mode) {
if (!src_uv || width <= 0 || height == 0 || height == INT_MIN || !dst_u ||
!dst_v) {
if (!src_uv || width <= 0 || height == 0 || !dst_u || !dst_v) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_uv = src_uv + (ptrdiff_t)(height - 1) * src_stride_uv;
src_uv = src_uv + (height - 1) * src_stride_uv;
src_stride_uv = -src_stride_uv;
}
@ -480,14 +473,14 @@ int RotatePlane(const uint8_t* src,
int width,
int height,
enum RotationMode mode) {
if (!src || width <= 0 || height == 0 || height == INT_MIN || !dst) {
if (!src || width <= 0 || height == 0 || !dst) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src = src + (ptrdiff_t)(height - 1) * src_stride;
src = src + (height - 1) * src_stride;
src_stride = -src_stride;
}
@ -540,7 +533,7 @@ static void RotatePlane90_16(const uint16_t* src,
// Rotate by 90 is a transpose with the source read
// from bottom to top. So set the source pointer to the end
// of the buffer and flip the sign of the source stride.
src += (ptrdiff_t)src_stride * (height - 1);
src += src_stride * (height - 1);
src_stride = -src_stride;
TransposePlane_16(src, src_stride, dst, dst_stride, width, height);
}
@ -554,7 +547,7 @@ static void RotatePlane270_16(const uint16_t* src,
// Rotate by 270 is a transpose with the destination written
// from bottom to top. So set the destination pointer to the end
// of the buffer and flip the sign of the destination stride.
dst += (ptrdiff_t)dst_stride * (width - 1);
dst += dst_stride * (width - 1);
dst_stride = -dst_stride;
TransposePlane_16(src, src_stride, dst, dst_stride, width, height);
}
@ -565,8 +558,8 @@ static void RotatePlane180_16(const uint16_t* src,
int dst_stride,
int width,
int height) {
const uint16_t* src_bot = src + (ptrdiff_t)src_stride * (height - 1);
uint16_t* dst_bot = dst + (ptrdiff_t)dst_stride * (height - 1);
const uint16_t* src_bot = src + src_stride * (height - 1);
uint16_t* dst_bot = dst + dst_stride * (height - 1);
int half_height = (height + 1) >> 1;
int y;
@ -598,14 +591,14 @@ int RotatePlane_16(const uint16_t* src,
int width,
int height,
enum RotationMode mode) {
if (!src || width <= 0 || height == 0 || height == INT_MIN || !dst) {
if (!src || width <= 0 || height == 0 || !dst) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src = src + (ptrdiff_t)(height - 1) * src_stride;
src = src + (height - 1) * src_stride;
src_stride = -src_stride;
}
@ -648,7 +641,7 @@ int I420Rotate(const uint8_t* src_y,
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
if ((!src_y && dst_y) || !src_u || !src_v || width <= 0 || height == 0 ||
height == INT_MIN || !dst_y || !dst_u || !dst_v) {
!dst_y || !dst_u || !dst_v) {
return -1;
}
@ -656,9 +649,9 @@ int I420Rotate(const uint8_t* src_y,
if (height < 0) {
height = -height;
halfheight = (height + 1) >> 1;
src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y;
src_u = src_u + (ptrdiff_t)(halfheight - 1) * src_stride_u;
src_v = src_v + (ptrdiff_t)(halfheight - 1) * src_stride_v;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (halfheight - 1) * src_stride_u;
src_v = src_v + (halfheight - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
@ -718,16 +711,16 @@ int I422Rotate(const uint8_t* src_y,
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
int r;
if (!src_y || !src_u || !src_v || width <= 0 || height == 0 ||
height == INT_MIN || !dst_y || !dst_u || !dst_v) {
if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y ||
!dst_u || !dst_v) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y;
src_u = src_u + (ptrdiff_t)(height - 1) * src_stride_u;
src_v = src_v + (ptrdiff_t)(height - 1) * src_stride_v;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (height - 1) * src_stride_u;
src_v = src_v + (height - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
@ -813,17 +806,17 @@ int I444Rotate(const uint8_t* src_y,
int width,
int height,
enum RotationMode mode) {
if (!src_y || !src_u || !src_v || width <= 0 || height == 0 ||
height == INT_MIN || !dst_y || !dst_u || !dst_v) {
if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y ||
!dst_u || !dst_v) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y;
src_u = src_u + (ptrdiff_t)(height - 1) * src_stride_u;
src_v = src_v + (ptrdiff_t)(height - 1) * src_stride_v;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (height - 1) * src_stride_u;
src_v = src_v + (height - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
@ -873,8 +866,8 @@ int NV12ToI420Rotate(const uint8_t* src_y,
enum RotationMode mode) {
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
if (!src_y || !src_uv || width <= 0 || height == 0 || height == INT_MIN ||
!dst_y || !dst_u || !dst_v) {
if (!src_y || !src_uv || width <= 0 || height == 0 || !dst_y || !dst_u ||
!dst_v) {
return -1;
}
@ -882,8 +875,8 @@ int NV12ToI420Rotate(const uint8_t* src_y,
if (height < 0) {
height = -height;
halfheight = (height + 1) >> 1;
src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y;
src_uv = src_uv + (ptrdiff_t)(halfheight - 1) * src_stride_uv;
src_y = src_y + (height - 1) * src_stride_y;
src_uv = src_uv + (halfheight - 1) * src_stride_uv;
src_stride_y = -src_stride_y;
src_stride_uv = -src_stride_uv;
}
@ -950,16 +943,16 @@ int Android420ToI420Rotate(const uint8_t* src_y,
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
height == 0 || height == INT_MIN) {
height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
halfheight = (height + 1) >> 1;
src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y;
src_u = src_u + (ptrdiff_t)(halfheight - 1) * src_stride_u;
src_v = src_v + (ptrdiff_t)(halfheight - 1) * src_stride_v;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (halfheight - 1) * src_stride_u;
src_v = src_v + (halfheight - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
@ -1025,16 +1018,16 @@ int I010Rotate(const uint16_t* src_y,
enum RotationMode mode) {
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
if (!src_y || !src_u || !src_v || width <= 0 || height == 0 ||
height == INT_MIN || !dst_y || !dst_u || !dst_v || dst_stride_y < 0) {
if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y ||
!dst_u || !dst_v || dst_stride_y < 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y;
src_u = src_u + (ptrdiff_t)(height - 1) * src_stride_u;
src_v = src_v + (ptrdiff_t)(height - 1) * src_stride_v;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (height - 1) * src_stride_u;
src_v = src_v + (height - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
@ -1096,16 +1089,16 @@ int I210Rotate(const uint16_t* src_y,
int halfwidth = (width + 1) >> 1;
int halfheight = (height + 1) >> 1;
int r;
if (!src_y || !src_u || !src_v || width <= 0 || height == 0 ||
height == INT_MIN || !dst_y || !dst_u || !dst_v) {
if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y ||
!dst_u || !dst_v) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y;
src_u = src_u + (ptrdiff_t)(height - 1) * src_stride_u;
src_v = src_v + (ptrdiff_t)(height - 1) * src_stride_v;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (height - 1) * src_stride_u;
src_v = src_v + (height - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
@ -1193,16 +1186,16 @@ int I410Rotate(const uint16_t* src_y,
int width,
int height,
enum RotationMode mode) {
if (!src_y || !src_u || !src_v || width <= 0 || height == 0 ||
height == INT_MIN || !dst_y || !dst_u || !dst_v || dst_stride_y < 0) {
if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y ||
!dst_u || !dst_v || dst_stride_y < 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_y = src_y + (ptrdiff_t)(height - 1) * src_stride_y;
src_u = src_u + (ptrdiff_t)(height - 1) * src_stride_u;
src_v = src_v + (ptrdiff_t)(height - 1) * src_stride_v;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (height - 1) * src_stride_u;
src_v = src_v + (height - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;

View File

@ -10,8 +10,6 @@
#include "libyuv/rotate_argb.h"
#include <limits.h>
#include "libyuv/convert.h"
#include "libyuv/cpu_id.h"
#include "libyuv/planar_functions.h"
@ -224,15 +222,14 @@ int ARGBRotate(const uint8_t* src_argb,
int width,
int height,
enum RotationMode mode) {
if (!src_argb || width <= 0 || height == 0 || height == INT_MIN ||
!dst_argb) {
if (!src_argb || width <= 0 || height == 0 || !dst_argb) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_argb = src_argb + (ptrdiff_t)(height - 1) * src_stride_argb;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}

View File

@ -191,10 +191,10 @@ void Transpose4x4_32_C(const uint8_t* src,
((uint32_t*)(dst3))[1] = p31;
((uint32_t*)(dst3))[2] = p32;
((uint32_t*)(dst3))[3] = p33;
src += (ptrdiff_t)src_stride * 4; // advance 4 rows
src1 += (ptrdiff_t)src_stride * 4;
src2 += (ptrdiff_t)src_stride * 4;
src3 += (ptrdiff_t)src_stride * 4;
src += src_stride * 4; // advance 4 rows
src1 += src_stride * 4;
src2 += src_stride * 4;
src3 += src_stride * 4;
dst += 4 * 4; // advance 4 columns
dst1 += 4 * 4;
dst2 += 4 * 4;

View File

@ -198,16 +198,16 @@ void Transpose4x4_32_NEON(const uint8_t* src,
"vst1.8 {q3}, [%7]! \n"
"bgt 1b \n"
: "+r"(src), // %0
"+r"(src1), // %1
"+r"(src2), // %2
"+r"(src3), // %3
"+r"(dst), // %4
"+r"(dst1), // %5
"+r"(dst2), // %6
"+r"(dst3), // %7
"+r"(width) // %8
: "r"((ptrdiff_t)src_stride * 4) // %9
: "+r"(src), // %0
"+r"(src1), // %1
"+r"(src2), // %2
"+r"(src3), // %3
"+r"(dst), // %4
"+r"(dst1), // %5
"+r"(dst2), // %6
"+r"(dst3), // %7
"+r"(width) // %8
: "r"((ptrdiff_t)(src_stride * 4)) // %9
: "memory", "cc", "q0", "q1", "q2", "q3");
}

View File

@ -252,16 +252,16 @@ void Transpose4x4_32_NEON(const uint8_t* src,
"st1 {v2.4s}, [%6], 16 \n"
"st1 {v3.4s}, [%7], 16 \n"
"b.gt 1b \n"
: "+r"(src), // %0
"+r"(src1), // %1
"+r"(src2), // %2
"+r"(src3), // %3
"+r"(dst), // %4
"+r"(dst1), // %5
"+r"(dst2), // %6
"+r"(dst3), // %7
"+r"(width) // %8
: "r"((ptrdiff_t)src_stride * 4) // %9
: "+r"(src), // %0
"+r"(src1), // %1
"+r"(src2), // %2
"+r"(src3), // %3
"+r"(dst), // %4
"+r"(dst1), // %5
"+r"(dst2), // %6
"+r"(dst3), // %7
"+r"(width) // %8
: "r"((ptrdiff_t)(src_stride * 4)) // %9
: "memory", "cc", "v0", "v1", "v2", "v3");
}

View File

@ -64,7 +64,7 @@ __declspec(naked) void TransposeWx8_SSSE3(const uint8_t* src,
mov eax, ebp
movdqa xmm7, xmm6
palignr xmm7, xmm7, 8
// Second round of bit swap.
// Second round of bit swap.
punpcklwd xmm0, xmm2
punpcklwd xmm1, xmm3
movdqa xmm2, xmm0
@ -77,8 +77,8 @@ __declspec(naked) void TransposeWx8_SSSE3(const uint8_t* src,
movdqa xmm7, xmm5
palignr xmm6, xmm6, 8
palignr xmm7, xmm7, 8
// Third round of bit swap.
// Write to the destination pointer.
// Third round of bit swap.
// Write to the destination pointer.
punpckldq xmm0, xmm4
movq qword ptr [edx], xmm0
movdqa xmm4, xmm0
@ -173,7 +173,7 @@ __declspec(naked) void TransposeUVWx8_SSE2(const uint8_t* src,
movdqa xmm7, xmm5
lea eax, [eax + 8 * edi + 16]
neg edi
// Second round of bit swap.
// Second round of bit swap.
movdqa xmm5, xmm0
punpcklwd xmm0, xmm2
punpckhwd xmm5, xmm2
@ -193,8 +193,8 @@ __declspec(naked) void TransposeUVWx8_SSE2(const uint8_t* src,
punpckhwd xmm6, xmm7
movdqa xmm7, xmm6
// Third round of bit swap.
// Write to the destination pointer.
// Third round of bit swap.
// Write to the destination pointer.
movdqa xmm6, xmm0
punpckldq xmm0, xmm4
punpckhdq xmm6, xmm4

View File

@ -10,6 +10,7 @@
#include "libyuv/row.h"
#include <stddef.h>
#include <string.h> // For memset.
#include "libyuv/basic_types.h"
@ -387,12 +388,6 @@ ANY31C(I444ToRGB24Row_Any_SSSE3, I444ToRGB24Row_SSSE3, 0, 0, 3, 15)
#ifdef HAS_I422TORGB24ROW_AVX2
ANY31C(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 31)
#endif
#ifdef HAS_I422TORGB24ROW_AVX512VBMI
ANY31C(I422ToRGB24Row_Any_AVX512VBMI, I422ToRGB24Row_AVX512VBMI, 1, 0, 3, 31)
#endif
#ifdef HAS_I422TORGB24ROW_AVX512BW
ANY31C(I422ToRGB24Row_Any_AVX512BW, I422ToRGB24Row_AVX512BW, 1, 0, 3, 31)
#endif
#ifdef HAS_I422TOARGBROW_AVX2
ANY31C(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15)
#endif
@ -951,7 +946,9 @@ ANY11(CopyRow_Any_NEON, CopyRow_NEON, 0, 1, 1, 31)
#if defined(HAS_ARGBTORGB24ROW_SSSE3)
ANY11(ARGBToRGB24Row_Any_SSSE3, ARGBToRGB24Row_SSSE3, 0, 4, 3, 15)
ANY11(ARGBToRAWRow_Any_SSSE3, ARGBToRAWRow_SSSE3, 0, 4, 3, 15)
ANY11(ARGBToRGB565Row_Any_SSE2, ARGBToRGB565Row_SSE2, 0, 4, 2, 3)
ANY11(ARGBToARGB1555Row_Any_SSE2, ARGBToARGB1555Row_SSE2, 0, 4, 2, 3)
ANY11(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, 0, 4, 2, 3)
#endif
#if defined(HAS_ARGBTORGB24ROW_AVX2)
ANY11(ARGBToRGB24Row_Any_AVX2, ARGBToRGB24Row_AVX2, 0, 4, 3, 31)
@ -987,9 +984,8 @@ ANY11(ABGRToAR30Row_Any_AVX2, ABGRToAR30Row_AVX2, 0, 4, 4, 7)
#if defined(HAS_ARGBTOAR30ROW_AVX2)
ANY11(ARGBToAR30Row_Any_AVX2, ARGBToAR30Row_AVX2, 0, 4, 4, 7)
#endif
#if defined(HAS_J400TOARGBROW_AVX512BW)
ANY11(J400ToARGBRow_Any_AVX512BW, J400ToARGBRow_AVX512BW, 0, 1, 4, 31)
#if defined(HAS_J400TOARGBROW_SSE2)
ANY11(J400ToARGBRow_Any_SSE2, J400ToARGBRow_SSE2, 0, 1, 4, 7)
#endif
#if defined(HAS_J400TOARGBROW_AVX2)
ANY11(J400ToARGBRow_Any_AVX2, J400ToARGBRow_AVX2, 0, 1, 4, 15)
@ -997,14 +993,13 @@ ANY11(J400ToARGBRow_Any_AVX2, J400ToARGBRow_AVX2, 0, 1, 4, 15)
#if defined(HAS_RGB24TOARGBROW_SSSE3)
ANY11(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, 0, 3, 4, 15)
ANY11(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, 0, 3, 4, 15)
ANY11(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, 0, 2, 4, 7)
ANY11(ARGB1555ToARGBRow_Any_SSE2, ARGB1555ToARGBRow_SSE2, 0, 2, 4, 7)
ANY11(ARGB4444ToARGBRow_Any_SSE2, ARGB4444ToARGBRow_SSE2, 0, 2, 4, 7)
#endif
#if defined(HAS_RAWTOARGBROW_AVX2)
ANY11(RAWToARGBRow_Any_AVX2, RAWToARGBRow_AVX2, 0, 3, 4, 31)
#endif
#if defined(HAS_RGB24TOARGBROW_AVX2)
ANY11(RGB24ToARGBRow_Any_AVX2, RGB24ToARGBRow_AVX2, 0, 3, 4, 31)
#endif
#if defined(HAS_RAWTOARGBROW_AVX512BW)
ANY11(RAWToARGBRow_Any_AVX512BW, RAWToARGBRow_AVX512BW, 0, 3, 4, 63)
#endif
@ -1420,8 +1415,8 @@ ANY11B(ARGBCopyYToAlphaRow_Any_SSE2, ARGBCopyYToAlphaRow_SSE2, 0, 1, 4, 7)
// Any 1 to 1 with parameter.
#define ANY11P(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK) \
void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, T param, int width) { \
SIMD_ALIGNED(uint8_t vin[(MASK + 1) * SBPP]); \
SIMD_ALIGNED(uint8_t vout[(MASK + 1) * BPP]); \
SIMD_ALIGNED(uint8_t vin[64]); \
SIMD_ALIGNED(uint8_t vout[64]); \
memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
@ -1467,6 +1462,14 @@ ANY11P(I400ToARGBRow_Any_LSX,
15)
#endif
#if defined(HAS_ARGBTORGB565DITHERROW_SSE2)
ANY11P(ARGBToRGB565DitherRow_Any_SSE2,
ARGBToRGB565DitherRow_SSE2,
const uint32_t,
4,
2,
3)
#endif
#if defined(HAS_ARGBTORGB565DITHERROW_AVX2)
ANY11P(ARGBToRGB565DitherRow_Any_AVX2,
ARGBToRGB565DitherRow_AVX2,
@ -1505,14 +1508,6 @@ ANY11P(ARGBShuffleRow_Any_SSSE3, ARGBShuffleRow_SSSE3, const uint8_t*, 4, 4, 7)
#ifdef HAS_ARGBSHUFFLEROW_AVX2
ANY11P(ARGBShuffleRow_Any_AVX2, ARGBShuffleRow_AVX2, const uint8_t*, 4, 4, 15)
#endif
#ifdef HAS_ARGBSHUFFLEROW_AVX512BW
ANY11P(ARGBShuffleRow_Any_AVX512BW,
ARGBShuffleRow_AVX512BW,
const uint8_t*,
4,
4,
31)
#endif
#ifdef HAS_ARGBSHUFFLEROW_NEON
ANY11P(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON, const uint8_t*, 4, 4, 3)
#endif
@ -1835,9 +1830,18 @@ ANY11C(UYVYToARGBRow_Any_LSX, UYVYToARGBRow_LSX, 1, 4, 4, 7)
memcpy(dst_ptr + np * BPP, vout, r * BPP * sizeof(TD)); \
}
#if defined(HAS_INTERPOLATEROW_AVX2)
#ifdef HAS_INTERPOLATEROW_AVX2
ANY11I(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, uint8_t, uint8_t, 1, 1, 31)
#endif
#ifdef HAS_INTERPOLATEROW_SSSE3
ANY11I(InterpolateRow_Any_SSSE3,
InterpolateRow_SSSE3,
uint8_t,
uint8_t,
1,
1,
15)
#endif
#ifdef HAS_INTERPOLATEROW_NEON
ANY11I(InterpolateRow_Any_NEON, InterpolateRow_NEON, uint8_t, uint8_t, 1, 1, 15)
#endif
@ -1854,15 +1858,6 @@ ANY11I(InterpolateRow_16_Any_NEON,
1,
7)
#endif
#ifdef HAS_INTERPOLATEROW_16_AVX2
ANY11I(InterpolateRow_16_Any_AVX2,
InterpolateRow_16_AVX2,
uint16_t,
uint16_t,
1,
1,
15)
#endif
#undef ANY11I
// Any 1 to 1 interpolate with scale param
@ -1911,8 +1906,8 @@ ANY11IS(InterpolateRow_16To8_Any_AVX2,
// Any 1 to 1 mirror.
#define ANY11M(NAMEANY, ANY_SIMD, BPP, MASK) \
void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \
SIMD_ALIGNED(uint8_t vin[128]); \
SIMD_ALIGNED(uint8_t vout[128]); \
SIMD_ALIGNED(uint8_t vin[64]); \
SIMD_ALIGNED(uint8_t vout[64]); \
memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
@ -1920,14 +1915,11 @@ ANY11IS(InterpolateRow_16To8_Any_AVX2,
ANY_SIMD(src_ptr + r * BPP, dst_ptr, n); \
} \
ptrdiff_t np = n; \
memcpy(vin, src_ptr, r * BPP); \
memcpy(vin, src_ptr, r* BPP); \
ANY_SIMD(vin, vout, MASK + 1); \
memcpy(dst_ptr + np * BPP, vout + (MASK + 1 - r) * BPP, r * BPP); \
}
#ifdef HAS_MIRRORROW_AVX512BW
ANY11M(MirrorRow_Any_AVX512BW, MirrorRow_AVX512BW, 1, 63)
#endif
#ifdef HAS_MIRRORROW_AVX2
ANY11M(MirrorRow_Any_AVX2, MirrorRow_AVX2, 1, 31)
#endif
@ -1946,6 +1938,9 @@ ANY11M(MirrorRow_Any_LASX, MirrorRow_LASX, 1, 63)
#ifdef HAS_MIRRORUVROW_AVX2
ANY11M(MirrorUVRow_Any_AVX2, MirrorUVRow_AVX2, 2, 15)
#endif
#ifdef HAS_MIRRORUVROW_SSSE3
ANY11M(MirrorUVRow_Any_SSSE3, MirrorUVRow_SSSE3, 2, 7)
#endif
#ifdef HAS_MIRRORUVROW_NEON
ANY11M(MirrorUVRow_Any_NEON, MirrorUVRow_NEON, 2, 31)
#endif
@ -1970,8 +1965,8 @@ ANY11M(ARGBMirrorRow_Any_LSX, ARGBMirrorRow_LSX, 4, 7)
#ifdef HAS_ARGBMIRRORROW_LASX
ANY11M(ARGBMirrorRow_Any_LASX, ARGBMirrorRow_LASX, 4, 15)
#endif
#ifdef HAS_RGB24MIRRORROW_AVX2
ANY11M(RGB24MirrorRow_Any_AVX2, RGB24MirrorRow_AVX2, 3, 31)
#ifdef HAS_RGB24MIRRORROW_SSSE3
ANY11M(RGB24MirrorRow_Any_SSSE3, RGB24MirrorRow_SSSE3, 3, 15)
#endif
#ifdef HAS_RGB24MIRRORROW_NEON
ANY11M(RGB24MirrorRow_Any_NEON, RGB24MirrorRow_NEON, 3, 15)
@ -2031,9 +2026,6 @@ ANY1(ARGBSetRow_Any_LSX, ARGBSetRow_LSX, uint32_t, 4, 3)
#ifdef HAS_SPLITUVROW_SSE2
ANY12(SplitUVRow_Any_SSE2, SplitUVRow_SSE2, 0, 2, 0, 15)
#endif
#ifdef HAS_SPLITUVROW_AVX512BW
ANY12(SplitUVRow_Any_AVX512BW, SplitUVRow_AVX512BW, 0, 2, 0, 63)
#endif
#ifdef HAS_SPLITUVROW_AVX2
ANY12(SplitUVRow_Any_AVX2, SplitUVRow_AVX2, 0, 2, 0, 31)
#endif
@ -2205,7 +2197,7 @@ ANY14(SplitARGBRow_Any_NEON, SplitARGBRow_NEON, 4, 15)
uint8_t* dst_v, int width) { \
SIMD_ALIGNED(uint8_t vin[256 * 2]); \
SIMD_ALIGNED(uint8_t vout[256 * 2]); \
memset(vin, 0, sizeof(vin)); /* for msan */ \
memset(vin, 0, sizeof(vin)); /* for msan */ \
memset(vout, 0, sizeof(vout)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
@ -2227,29 +2219,29 @@ ANY14(SplitARGBRow_Any_NEON, SplitARGBRow_NEON, 4, 15)
memcpy(dst_v + (np >> 1), vout + 256, SS(r, 1)); \
}
#define ANY12M(NAMEANY, ANY_SIMD, BPP, MASK) \
void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, \
int width, const struct ArgbConstants* c) { \
SIMD_ALIGNED(uint8_t vin[256]); \
SIMD_ALIGNED(uint8_t vout[256 * 2]); \
memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_ptr, dst_u, dst_v, n, c); \
} \
memcpy(vin, src_ptr + (ptrdiff_t)n * BPP, (ptrdiff_t)r * BPP); \
ANY_SIMD(vin, vout, vout + 256, MASK + 1, c); \
memcpy(dst_u + (ptrdiff_t)n, vout, (ptrdiff_t)r); \
memcpy(dst_v + (ptrdiff_t)n, vout + 256, (ptrdiff_t)r); \
#define ANY12M(NAMEANY, ANY_SIMD, BPP, MASK) \
void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, \
int width, const struct ArgbConstants* c) { \
SIMD_ALIGNED(uint8_t vin[256]); \
SIMD_ALIGNED(uint8_t vout[256 * 2]); \
memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_ptr, dst_u, dst_v, n, c); \
} \
memcpy(vin, src_ptr + (ptrdiff_t)n * BPP, (ptrdiff_t)r * BPP); \
ANY_SIMD(vin, vout, vout + 256, MASK + 1, c); \
memcpy(dst_u + (ptrdiff_t)n, vout, (ptrdiff_t)r); \
memcpy(dst_v + (ptrdiff_t)n, vout + 256, (ptrdiff_t)r); \
}
#define ANY12MS(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \
void NAMEANY(const uint8_t* src_ptr, int src_stride, uint8_t* dst_u, \
uint8_t* dst_v, int width, const struct ArgbConstants* c) { \
void NAMEANY(const uint8_t* src_ptr, int src_stride, uint8_t* dst_u, \
uint8_t* dst_v, int width, const struct ArgbConstants* c) { \
SIMD_ALIGNED(uint8_t vin[256 * 2]); \
SIMD_ALIGNED(uint8_t vout[256 * 2]); \
memset(vin, 0, sizeof(vin)); /* for msan */ \
memset(vin, 0, sizeof(vin)); /* for msan */ \
memset(vout, 0, sizeof(vout)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
@ -2277,35 +2269,12 @@ ANY12MS(ARGBToUVMatrixRow_Any_NEON, ARGBToUVMatrixRow_NEON, 0, 4, 15)
#ifdef HAS_ARGBTOUVMATRIXROW_NEON_I8MM
ANY12MS(ARGBToUVMatrixRow_Any_NEON_I8MM, ARGBToUVMatrixRow_NEON_I8MM, 0, 4, 15)
#endif
#ifdef HAS_RGBTOUVMATRIXROW_NEON
ANY12MS(RGBToUVMatrixRow_Any_NEON, RGBToUVMatrixRow_NEON, 0, 3, 15)
#endif
#ifdef HAS_RGB565TOUVMATRIXROW_NEON
ANY12MS(RGB565ToUVMatrixRow_Any_NEON, RGB565ToUVMatrixRow_NEON, 0, 2, 15)
#endif
#ifdef HAS_ARGB1555TOUVMATRIXROW_NEON
ANY12MS(ARGB1555ToUVMatrixRow_Any_NEON, ARGB1555ToUVMatrixRow_NEON, 0, 2, 15)
#endif
#ifdef HAS_ARGB4444TOUVMATRIXROW_NEON
ANY12MS(ARGB4444ToUVMatrixRow_Any_NEON, ARGB4444ToUVMatrixRow_NEON, 0, 2, 15)
#endif
#ifdef HAS_ARGBTOUVMATRIXROW_AVX2
ANY12MS(ARGBToUVMatrixRow_Any_AVX2, ARGBToUVMatrixRow_AVX2, 0, 4, 31)
ANY12MS(RGBToUVMatrixRow_Any_AVX2, RGBToUVMatrixRow_AVX2, 0, 3, 31)
ANY12MS(RGB565ToUVMatrixRow_Any_AVX2, RGB565ToUVMatrixRow_AVX2, 0, 2, 31)
#ifdef HAS_ARGB1555TOARGBROW_AVX2
ANY12MS(ARGB1555ToUVMatrixRow_Any_AVX2, ARGB1555ToUVMatrixRow_AVX2, 0, 2, 31)
#endif
#ifdef HAS_ARGB4444TOARGBROW_AVX2
ANY12MS(ARGB4444ToUVMatrixRow_Any_AVX2, ARGB4444ToUVMatrixRow_AVX2, 0, 2, 31)
#endif
ANY12MS(ARGBToUVMatrixRow_Any_AVX2, ARGBToUVMatrixRow_AVX2, 0, 4, 15)
#endif
#ifdef HAS_ARGBTOUVMATRIXROW_AVX512BW
ANY12MS(ARGBToUVMatrixRow_Any_AVX512BW, ARGBToUVMatrixRow_AVX512BW, 0, 4, 63)
#endif
#ifdef HAS_RGBTOUVMATRIXROW_AVX512BW
ANY12MS(RGBToUVMatrixRow_Any_AVX512BW, RGBToUVMatrixRow_AVX512BW, 0, 3, 63)
#endif
#ifdef HAS_ARGBTOUVMATRIXROW_SSSE3
ANY12MS(ARGBToUVMatrixRow_Any_SSSE3, ARGBToUVMatrixRow_SSSE3, 0, 4, 7)
#endif
@ -2322,20 +2291,20 @@ ANY12M(ARGBToUV444MatrixRow_Any_SSSE3, ARGBToUV444MatrixRow_SSSE3, 4, 15)
ANY12M(ARGBToUV444MatrixRow_Any_NEON, ARGBToUV444MatrixRow_NEON, 4, 7)
#endif
#define ANY11MC(NAMEANY, ANY_SIMD, BPP, MASK) \
void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width, \
const struct ArgbConstants* c) { \
SIMD_ALIGNED(uint8_t vin[256]); \
SIMD_ALIGNED(uint8_t vout[256]); \
memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_ptr, dst_ptr, n, c); \
} \
memcpy(vin, src_ptr + (ptrdiff_t)n * BPP, (ptrdiff_t)r * BPP); \
ANY_SIMD(vin, vout, MASK + 1, c); \
memcpy(dst_ptr + (ptrdiff_t)n, vout, (ptrdiff_t)r); \
#define ANY11MC(NAMEANY, ANY_SIMD, BPP, MASK) \
void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width, \
const struct ArgbConstants* c) { \
SIMD_ALIGNED(uint8_t vin[256]); \
SIMD_ALIGNED(uint8_t vout[256]); \
memset(vin, 0, sizeof(vin)); /* for msan */ \
int r = width & MASK; \
int n = width & ~MASK; \
if (n > 0) { \
ANY_SIMD(src_ptr, dst_ptr, n, c); \
} \
memcpy(vin, src_ptr + (ptrdiff_t)n * BPP, (ptrdiff_t)r * BPP); \
ANY_SIMD(vin, vout, MASK + 1, c); \
memcpy(dst_ptr + (ptrdiff_t)n, vout, (ptrdiff_t)r); \
}
#ifdef HAS_ARGBTOYROW_SSSE3
@ -2343,14 +2312,6 @@ ANY11MC(ARGBToYMatrixRow_Any_SSSE3, ARGBToYMatrixRow_SSSE3, 4, 15)
#endif
#ifdef HAS_ARGBTOYROW_AVX2
ANY11MC(ARGBToYMatrixRow_Any_AVX2, ARGBToYMatrixRow_AVX2, 4, 31)
ANY11MC(RGBToYMatrixRow_Any_AVX2, RGBToYMatrixRow_AVX2, 3, 31)
ANY11MC(RGB565ToYMatrixRow_Any_AVX2, RGB565ToYMatrixRow_AVX2, 2, 31)
#ifdef HAS_ARGB1555TOYMATRIXROW_AVX2
ANY11MC(ARGB1555ToYMatrixRow_Any_AVX2, ARGB1555ToYMatrixRow_AVX2, 2, 31)
#endif
#ifdef HAS_ARGB4444TOYMATRIXROW_AVX2
ANY11MC(ARGB4444ToYMatrixRow_Any_AVX2, ARGB4444ToYMatrixRow_AVX2, 2, 31)
#endif
#endif
#ifdef HAS_ARGBTOYROW_AVX512BW
ANY11MC(ARGBToYMatrixRow_Any_AVX512BW, ARGBToYMatrixRow_AVX512BW, 4, 63)
@ -2361,18 +2322,6 @@ ANY11MC(ARGBToYMatrixRow_Any_NEON, ARGBToYMatrixRow_NEON, 4, 15)
#ifdef HAS_ARGBTOYMATRIXROW_NEON_DOTPROD
ANY11MC(ARGBToYMatrixRow_Any_NEON_DotProd, ARGBToYMatrixRow_NEON_DotProd, 4, 15)
#endif
#ifdef HAS_RGBTOYMATRIXROW_NEON
ANY11MC(RGBToYMatrixRow_Any_NEON, RGBToYMatrixRow_NEON, 3, 15)
#endif
#ifdef HAS_RGB565TOYMATRIXROW_NEON
ANY11MC(RGB565ToYMatrixRow_Any_NEON, RGB565ToYMatrixRow_NEON, 2, 15)
#endif
#ifdef HAS_ARGB1555TOYMATRIXROW_NEON
ANY11MC(ARGB1555ToYMatrixRow_Any_NEON, ARGB1555ToYMatrixRow_NEON, 2, 15)
#endif
#ifdef HAS_ARGB4444TOYMATRIXROW_NEON
ANY11MC(ARGB4444ToYMatrixRow_Any_NEON, ARGB4444ToYMatrixRow_NEON, 2, 15)
#endif
#ifdef HAS_ARGBTOYMATRIXROW_LSX
ANY11MC(ARGBToYMatrixRow_Any_LSX, ARGBToYMatrixRow_LSX, 4, 15)
#endif

View File

@ -14,7 +14,7 @@
#include <string.h> // For memcpy and memset.
#include "libyuv/basic_types.h"
#include "libyuv/convert_argb.h" // For kYuvI601Constants
#include "libyuv/convert_argb.h" // For kYuvI601Constants
#include "libyuv/convert_from_argb.h" // For ArgbConstants
#ifdef __cplusplus
@ -37,6 +37,10 @@ extern "C" {
// LIBYUV_UNLIMITED_BT709
// LIBYUV_UNLIMITED_BT2020
#if defined(LIBYUV_BIT_EXACT)
#define LIBYUV_UNATTENUATE_DUP 1
#endif
// llvm x86 is poor at ternary operator, so use branchless min/max.
#define USE_BRANCHLESS 1
@ -749,31 +753,28 @@ MAKEROWYJ(ABGR, 0, 1, 2, 4)
MAKEROWYJ(RGBA, 3, 2, 1, 4)
#undef MAKEROWYJ
static __inline uint8_t RGBToYMatrix(uint8_t b0,
uint8_t b1,
uint8_t b2,
uint8_t b3,
static __inline uint8_t RGBToYMatrix(uint8_t r,
uint8_t g,
uint8_t b,
const struct ArgbConstants* c) {
return (c->kRGBToY[0] * b0 + c->kRGBToY[1] * b1 + c->kRGBToY[2] * b2 +
c->kRGBToY[3] * b3 + c->kAddY[0]) >>
return (c->kRGBToY[2] * r + c->kRGBToY[1] * g + c->kRGBToY[0] * b +
c->kAddY[0]) >>
8;
}
static __inline uint8_t RGBToUMatrix(uint8_t b0,
uint8_t b1,
uint8_t b2,
uint8_t b3,
static __inline uint8_t RGBToUMatrix(uint8_t r,
uint8_t g,
uint8_t b,
const struct ArgbConstants* c) {
return (c->kAddUV[0] - (c->kRGBToU[0] * b0 + c->kRGBToU[1] * b1 +
c->kRGBToU[2] * b2 + c->kRGBToU[3] * b3)) >>
return (c->kAddUV[0] -
(c->kRGBToU[2] * r + c->kRGBToU[1] * g + c->kRGBToU[0] * b)) >>
8;
}
static __inline uint8_t RGBToVMatrix(uint8_t b0,
uint8_t b1,
uint8_t b2,
uint8_t b3,
static __inline uint8_t RGBToVMatrix(uint8_t r,
uint8_t g,
uint8_t b,
const struct ArgbConstants* c) {
return (c->kAddUV[0] - (c->kRGBToV[0] * b0 + c->kRGBToV[1] * b1 +
c->kRGBToV[2] * b2 + c->kRGBToV[3] * b3)) >>
return (c->kAddUV[0] -
(c->kRGBToV[2] * r + c->kRGBToV[1] * g + c->kRGBToV[0] * b)) >>
8;
}
@ -783,8 +784,7 @@ void ARGBToYMatrixRow_C(const uint8_t* src_argb,
const struct ArgbConstants* c) {
int x;
for (x = 0; x < width; ++x) {
dst_y[0] =
RGBToYMatrix(src_argb[0], src_argb[1], src_argb[2], src_argb[3], c);
dst_y[0] = RGBToYMatrix(src_argb[2], src_argb[1], src_argb[0], c);
src_argb += 4;
dst_y += 1;
}
@ -799,28 +799,25 @@ void ARGBToUVMatrixRow_C(const uint8_t* src_argb,
const uint8_t* src_argb1 = src_argb + src_stride_argb;
int x;
for (x = 0; x < width - 1; x += 2) {
uint8_t b0 =
uint8_t ab =
(src_argb[0] + src_argb[4] + src_argb1[0] + src_argb1[4] + 2) >> 2;
uint8_t b1 =
uint8_t ag =
(src_argb[1] + src_argb[5] + src_argb1[1] + src_argb1[5] + 2) >> 2;
uint8_t b2 =
uint8_t ar =
(src_argb[2] + src_argb[6] + src_argb1[2] + src_argb1[6] + 2) >> 2;
uint8_t b3 =
(src_argb[3] + src_argb[7] + src_argb1[3] + src_argb1[7] + 2) >> 2;
dst_u[0] = RGBToUMatrix(b0, b1, b2, b3, c);
dst_v[0] = RGBToVMatrix(b0, b1, b2, b3, c);
dst_u[0] = RGBToUMatrix(ar, ag, ab, c);
dst_v[0] = RGBToVMatrix(ar, ag, ab, c);
src_argb += 8;
src_argb1 += 8;
dst_u += 1;
dst_v += 1;
}
if (width & 1) {
uint8_t b0 = (src_argb[0] + src_argb1[0] + 1) >> 1;
uint8_t b1 = (src_argb[1] + src_argb1[1] + 1) >> 1;
uint8_t b2 = (src_argb[2] + src_argb1[2] + 1) >> 1;
uint8_t b3 = (src_argb[3] + src_argb1[3] + 1) >> 1;
dst_u[0] = RGBToUMatrix(b0, b1, b2, b3, c);
dst_v[0] = RGBToVMatrix(b0, b1, b2, b3, c);
uint8_t ab = (src_argb[0] + src_argb1[0] + 1) >> 1;
uint8_t ag = (src_argb[1] + src_argb1[1] + 1) >> 1;
uint8_t ar = (src_argb[2] + src_argb1[2] + 1) >> 1;
dst_u[0] = RGBToUMatrix(ar, ag, ab, c);
dst_v[0] = RGBToVMatrix(ar, ag, ab, c);
}
}
@ -831,10 +828,11 @@ void ARGBToUV444MatrixRow_C(const uint8_t* src_argb,
const struct ArgbConstants* c) {
int x;
for (x = 0; x < width; ++x) {
dst_u[0] =
RGBToUMatrix(src_argb[0], src_argb[1], src_argb[2], src_argb[3], c);
dst_v[0] =
RGBToVMatrix(src_argb[0], src_argb[1], src_argb[2], src_argb[3], c);
uint8_t ab = src_argb[0];
uint8_t ag = src_argb[1];
uint8_t ar = src_argb[2];
dst_u[0] = RGBToUMatrix(ar, ag, ab, c);
dst_v[0] = RGBToVMatrix(ar, ag, ab, c);
src_argb += 4;
dst_u += 1;
dst_v += 1;
@ -1514,18 +1512,18 @@ void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) {
const struct YuvConstants SIMD_ALIGNED(kYvu##name##Constants) = \
YUVCONSTANTSBODY(YG, YB, VR, VG, UG, UB);
#define MAKEARGBCONSTANTS(name, RY, GY, BY, RU, GU, BU, RV, GV, BV, AY, AUV) \
extern const struct ArgbConstants SIMD_ALIGNED(kArgb##name##Constants) = \
ARGBCONSTANTSBODY(BY, GY, RY, 0, -(BU), -(GU), -(RU), 0, -(BV), -(GV), \
-(RV), 0, AY, AUV); \
extern const struct ArgbConstants SIMD_ALIGNED(kAbgr##name##Constants) = \
ARGBCONSTANTSBODY(RY, GY, BY, 0, -(RU), -(GU), -(BU), 0, -(RV), -(GV), \
-(BV), 0, AY, AUV); \
extern const struct ArgbConstants SIMD_ALIGNED(kRgba##name##Constants) = \
ARGBCONSTANTSBODY(0, BY, GY, RY, 0, -(BU), -(GU), -(RU), 0, -(BV), \
-(GV), -(RV), AY, AUV); \
extern const struct ArgbConstants SIMD_ALIGNED(kBgra##name##Constants) = \
ARGBCONSTANTSBODY(0, RY, GY, BY, 0, -(RU), -(GU), -(BU), 0, -(RV), \
#define MAKEARGBCONSTANTS(name, RY, GY, BY, RU, GU, BU, RV, GV, BV, AY, AUV) \
const struct ArgbConstants SIMD_ALIGNED(kArgb##name##Constants) = \
ARGBCONSTANTSBODY(BY, GY, RY, 0, -(BU), -(GU), -(RU), 0, -(BV), -(GV), \
-(RV), 0, AY, AUV); \
const struct ArgbConstants SIMD_ALIGNED(kAbgr##name##Constants) = \
ARGBCONSTANTSBODY(RY, GY, BY, 0, -(RU), -(GU), -(BU), 0, -(RV), -(GV), \
-(BV), 0, AY, AUV); \
const struct ArgbConstants SIMD_ALIGNED(kRgba##name##Constants) = \
ARGBCONSTANTSBODY(0, BY, GY, RY, 0, -(BU), -(GU), -(RU), 0, -(BV), \
-(GV), -(RV), AY, AUV); \
const struct ArgbConstants SIMD_ALIGNED(kBgra##name##Constants) = \
ARGBCONSTANTSBODY(0, RY, GY, BY, 0, -(RU), -(GU), -(BU), 0, -(RV), \
-(GV), -(BV), AY, AUV);
// BT.601 limited range RGB to YUV coefficients
@ -3468,7 +3466,7 @@ void ARGBBlendRow_C(const uint8_t* src_argb,
}
#undef BLEND
#define UBLEND(f, b, a) (((a) * f) + ((255 - a) * b) + 255) >> 8
#define UBLEND(f, b, a) (((a)*f) + ((255 - a) * b) + 255) >> 8
void BlendPlaneRow_C(const uint8_t* src0,
const uint8_t* src1,
const uint8_t* alpha,
@ -3575,8 +3573,12 @@ const uint32_t fixed_invtbl8[256] = {
T(0xfc), T(0xfd), T(0xfe), 0x01000100};
#undef T
#if defined(LIBYUV_UNATTENUATE_DUP)
// This code mimics the Intel SIMD version for better testability.
#define UNATTENUATE(f, ia) clamp255(((f | (f << 8)) * ia) >> 16)
#else
#define UNATTENUATE(f, ia) clamp255((f * ia) >> 8)
#endif
// mimics the Intel SIMD code for exactness.
void ARGBUnattenuateRow_C(const uint8_t* src_argb,
@ -3664,8 +3666,7 @@ void ARGBAffineRow_C(const uint8_t* src_argb,
int x = (int)(uv[0]);
int y = (int)(uv[1]);
*(uint32_t*)(dst_argb) =
*(const uint32_t*)(src_argb + (ptrdiff_t)y * src_argb_stride +
(ptrdiff_t)x * 4);
*(const uint32_t*)(src_argb + y * src_argb_stride + x * 4);
dst_argb += 4;
uv[0] += uv_dudv[2];
uv[1] += uv_dudv[3];
@ -4171,7 +4172,7 @@ void NV21ToRGB24Row_SSSE3(const uint8_t* src_y,
}
#endif
#if defined(HAS_NV12TOARGBROW_AVX2) && defined(HAS_ARGBTORGB24ROW_AVX2)
#if defined(HAS_NV12TORGB24ROW_AVX2)
void NV12ToRGB24Row_AVX2(const uint8_t* src_y,
const uint8_t* src_uv,
uint8_t* dst_rgb24,
@ -4182,7 +4183,11 @@ void NV12ToRGB24Row_AVX2(const uint8_t* src_y,
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth);
#if defined(HAS_ARGBTORGB24ROW_AVX2)
ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
#else
ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
#endif
src_y += twidth;
src_uv += twidth;
dst_rgb24 += twidth * 3;
@ -4191,7 +4196,7 @@ void NV12ToRGB24Row_AVX2(const uint8_t* src_y,
}
#endif
#if defined(HAS_NV21TOARGBROW_AVX2) && defined(HAS_ARGBTORGB24ROW_AVX2)
#if defined(HAS_NV21TORGB24ROW_AVX2)
void NV21ToRGB24Row_AVX2(const uint8_t* src_y,
const uint8_t* src_vu,
uint8_t* dst_rgb24,
@ -4202,7 +4207,11 @@ void NV21ToRGB24Row_AVX2(const uint8_t* src_y,
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
NV21ToARGBRow_AVX2(src_y, src_vu, row, yuvconstants, twidth);
#if defined(HAS_ARGBTORGB24ROW_AVX2)
ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
#else
ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
#endif
src_y += twidth;
src_vu += twidth;
dst_rgb24 += twidth * 3;
@ -4211,7 +4220,7 @@ void NV21ToRGB24Row_AVX2(const uint8_t* src_y,
}
#endif
#if defined(HAS_I422TOARGBROW_AVX2) && defined(HAS_ARGBTORGB565ROW_AVX2)
#if defined(HAS_I422TORGB565ROW_AVX2)
void I422ToRGB565Row_AVX2(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@ -4222,7 +4231,11 @@ void I422ToRGB565Row_AVX2(const uint8_t* src_y,
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
#if defined(HAS_ARGBTORGB565ROW_AVX2)
ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
#else
ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
#endif
src_y += twidth;
src_u += twidth / 2;
src_v += twidth / 2;
@ -4232,7 +4245,7 @@ void I422ToRGB565Row_AVX2(const uint8_t* src_y,
}
#endif
#if defined(HAS_I422TOARGBROW_AVX2) && defined(HAS_ARGBTOARGB1555ROW_AVX2)
#if defined(HAS_I422TOARGB1555ROW_AVX2)
void I422ToARGB1555Row_AVX2(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@ -4244,7 +4257,11 @@ void I422ToARGB1555Row_AVX2(const uint8_t* src_y,
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
#if defined(HAS_ARGBTOARGB1555ROW_AVX2)
ARGBToARGB1555Row_AVX2(row, dst_argb1555, twidth);
#else
ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth);
#endif
src_y += twidth;
src_u += twidth / 2;
src_v += twidth / 2;
@ -4254,7 +4271,7 @@ void I422ToARGB1555Row_AVX2(const uint8_t* src_y,
}
#endif
#if defined(HAS_I422TOARGBROW_AVX2) && defined(HAS_ARGBTOARGB4444ROW_AVX2)
#if defined(HAS_I422TOARGB4444ROW_AVX2)
void I422ToARGB4444Row_AVX2(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@ -4266,7 +4283,11 @@ void I422ToARGB4444Row_AVX2(const uint8_t* src_y,
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
#if defined(HAS_ARGBTOARGB4444ROW_AVX2)
ARGBToARGB4444Row_AVX2(row, dst_argb4444, twidth);
#else
ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth);
#endif
src_y += twidth;
src_u += twidth / 2;
src_v += twidth / 2;
@ -4276,7 +4297,7 @@ void I422ToARGB4444Row_AVX2(const uint8_t* src_y,
}
#endif
#if defined(HAS_I422TOARGBROW_AVX2) && defined(HAS_ARGBTORGB24ROW_AVX2)
#if defined(HAS_I422TORGB24ROW_AVX2)
void I422ToRGB24Row_AVX2(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@ -4288,7 +4309,11 @@ void I422ToRGB24Row_AVX2(const uint8_t* src_y,
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
#if defined(HAS_ARGBTORGB24ROW_AVX2)
ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
#else
ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
#endif
src_y += twidth;
src_u += twidth / 2;
src_v += twidth / 2;
@ -4298,51 +4323,7 @@ void I422ToRGB24Row_AVX2(const uint8_t* src_y,
}
#endif
#if defined(HAS_I422TOARGBROW_AVX512BW) && defined(HAS_ARGBTORGB24ROW_AVX512VBMI)
void I422ToRGB24Row_AVX512VBMI(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_rgb24,
const struct YuvConstants* yuvconstants,
int width) {
// Row buffer for intermediate ARGB pixels.
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
I422ToARGBRow_AVX512BW(src_y, src_u, src_v, row, yuvconstants, twidth);
ARGBToRGB24Row_AVX512VBMI(row, dst_rgb24, twidth);
src_y += twidth;
src_u += twidth / 2;
src_v += twidth / 2;
dst_rgb24 += twidth * 3;
width -= twidth;
}
}
#endif
#if defined(HAS_I422TOARGBROW_AVX512BW) && defined(HAS_ARGBTORGB24ROW_AVX2)
void I422ToRGB24Row_AVX512BW(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_rgb24,
const struct YuvConstants* yuvconstants,
int width) {
// Row buffer for intermediate ARGB pixels.
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
I422ToARGBRow_AVX512BW(src_y, src_u, src_v, row, yuvconstants, twidth);
ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
src_y += twidth;
src_u += twidth / 2;
src_v += twidth / 2;
dst_rgb24 += twidth * 3;
width -= twidth;
}
}
#endif
#if defined(HAS_I444TOARGBROW_AVX2) && defined(HAS_ARGBTORGB24ROW_AVX2)
#if defined(HAS_I444TORGB24ROW_AVX2)
void I444ToRGB24Row_AVX2(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@ -4354,7 +4335,11 @@ void I444ToRGB24Row_AVX2(const uint8_t* src_y,
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
I444ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
#if defined(HAS_ARGBTORGB24ROW_AVX2)
ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
#else
ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
#endif
src_y += twidth;
src_u += twidth;
src_v += twidth;
@ -4364,7 +4349,7 @@ void I444ToRGB24Row_AVX2(const uint8_t* src_y,
}
#endif
#if defined(HAS_NV12TOARGBROW_AVX2) && defined(HAS_ARGBTORGB565ROW_AVX2)
#if defined(HAS_NV12TORGB565ROW_AVX2)
void NV12ToRGB565Row_AVX2(const uint8_t* src_y,
const uint8_t* src_uv,
uint8_t* dst_rgb565,
@ -4375,7 +4360,11 @@ void NV12ToRGB565Row_AVX2(const uint8_t* src_y,
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth);
#if defined(HAS_ARGBTORGB565ROW_AVX2)
ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
#else
ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
#endif
src_y += twidth;
src_uv += twidth;
dst_rgb565 += twidth * 2;
@ -4384,6 +4373,26 @@ void NV12ToRGB565Row_AVX2(const uint8_t* src_y,
}
#endif
#ifdef HAS_RGB24TOYJROW_AVX2
// Convert 16 RGB24 pixels (64 bytes) to 16 YJ values.
}
#endif // HAS_RGB24TOYJROW_AVX2
#ifdef HAS_RAWTOYJROW_AVX2
// Convert 32 RAW pixels (128 bytes) to 32 YJ values.
}
#endif // HAS_RAWTOYJROW_AVX2
#ifdef HAS_RGB24TOYJROW_SSSE3
// Convert 16 RGB24 pixels (64 bytes) to 16 YJ values.
}
#endif // HAS_RGB24TOYJROW_SSSE3
#ifdef HAS_RAWTOYJROW_SSSE3
// Convert 16 RAW pixels (64 bytes) to 16 YJ values.
}
#endif // HAS_RAWTOYJROW_SSSE3
#ifdef HAS_INTERPOLATEROW_16TO8_AVX2
void InterpolateRow_16To8_AVX2(uint8_t* dst_ptr,
const uint16_t* src_ptr,
@ -4395,7 +4404,7 @@ void InterpolateRow_16To8_AVX2(uint8_t* dst_ptr,
SIMD_ALIGNED(uint16_t row[MAXTWIDTH]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
InterpolateRow_16_AVX2(row, src_ptr, src_stride, twidth, source_y_fraction);
InterpolateRow_16_C(row, src_ptr, src_stride, twidth, source_y_fraction);
Convert16To8Row_AVX2(row, dst_ptr, scale, twidth);
src_ptr += twidth;
dst_ptr += twidth;
@ -4601,465 +4610,6 @@ void HalfMergeUVRow_C(const uint8_t* src_u,
#undef STATIC_CAST
void RGBToYMatrixRow_C(const uint8_t* src_rgb,
uint8_t* dst_y,
int width,
const struct ArgbConstants* c) {
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
RGB24ToARGBRow_C(src_rgb, row, twidth);
ARGBToYMatrixRow_C(row, dst_y, twidth, c);
src_rgb += twidth * 3;
dst_y += twidth;
width -= twidth;
}
}
void RGBToUVMatrixRow_C(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width,
const struct ArgbConstants* c) {
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
RGB24ToARGBRow_C(src_rgb, row, twidth);
RGB24ToARGBRow_C(src_rgb + src_stride_rgb, row + MAXTWIDTH * 4, twidth);
ARGBToUVMatrixRow_C(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c);
src_rgb += twidth * 3;
dst_u += twidth / 2;
dst_v += twidth / 2;
width -= twidth;
}
}
#if defined(HAS_ARGBTOYMATRIXROW_AVX2) && defined(HAS_RGB24TOARGBROW_AVX2)
void RGBToYMatrixRow_AVX2(const uint8_t* src_rgb,
uint8_t* dst_y,
int width,
const struct ArgbConstants* c) {
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
RGB24ToARGBRow_AVX2(src_rgb, row, twidth);
ARGBToYMatrixRow_AVX2(row, dst_y, twidth, c);
src_rgb += twidth * 3;
dst_y += twidth;
width -= twidth;
}
}
#endif
#if defined(HAS_ARGBTOUVMATRIXROW_AVX2) && defined(HAS_RGB24TOARGBROW_AVX2)
void RGBToUVMatrixRow_AVX2(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width,
const struct ArgbConstants* c) {
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
RGB24ToARGBRow_AVX2(src_rgb, row, twidth);
RGB24ToARGBRow_AVX2(src_rgb + src_stride_rgb, row + MAXTWIDTH * 4, twidth);
ARGBToUVMatrixRow_AVX2(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c);
src_rgb += twidth * 3;
dst_u += twidth / 2;
dst_v += twidth / 2;
width -= twidth;
}
}
#endif
#if defined(HAS_ARGBTOUVMATRIXROW_AVX512BW) && \
defined(HAS_RGB24TOARGBROW_AVX512BW)
void RGBToUVMatrixRow_AVX512BW(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width,
const struct ArgbConstants* c) {
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
RGB24ToARGBRow_AVX512BW(src_rgb, row, twidth);
RGB24ToARGBRow_AVX512BW(src_rgb + src_stride_rgb, row + MAXTWIDTH * 4,
twidth);
ARGBToUVMatrixRow_AVX512BW(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c);
src_rgb += twidth * 3;
dst_u += twidth / 2;
dst_v += twidth / 2;
width -= twidth;
}
}
#endif
#if defined(HAS_ARGBTOUVMATRIXROW_NEON) && defined(HAS_RGB24TOARGBROW_NEON)
void RGBToUVMatrixRow_NEON(const uint8_t* src_rgb,
int src_stride_rgb,
uint8_t* dst_u,
uint8_t* dst_v,
int width,
const struct ArgbConstants* c) {
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
RGB24ToARGBRow_NEON(src_rgb, row, twidth);
RGB24ToARGBRow_NEON(src_rgb + src_stride_rgb, row + MAXTWIDTH * 4, twidth);
ARGBToUVMatrixRow_NEON(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c);
src_rgb += twidth * 3;
dst_u += twidth / 2;
dst_v += twidth / 2;
width -= twidth;
}
}
#endif
void RGB565ToYMatrixRow_C(const uint8_t* src_rgb565,
uint8_t* dst_y,
int width,
const struct ArgbConstants* c) {
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
RGB565ToARGBRow_C(src_rgb565, row, twidth);
ARGBToYMatrixRow_C(row, dst_y, twidth, c);
src_rgb565 += twidth * 2;
dst_y += twidth;
width -= twidth;
}
}
void RGB565ToUVMatrixRow_C(const uint8_t* src_rgb565,
int src_stride_rgb565,
uint8_t* dst_u,
uint8_t* dst_v,
int width,
const struct ArgbConstants* c) {
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
RGB565ToARGBRow_C(src_rgb565, row, twidth);
RGB565ToARGBRow_C(src_rgb565 + src_stride_rgb565, row + MAXTWIDTH * 4,
twidth);
ARGBToUVMatrixRow_C(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c);
src_rgb565 += twidth * 2;
dst_u += twidth / 2;
dst_v += twidth / 2;
width -= twidth;
}
}
#if defined(HAS_ARGBTOYMATRIXROW_AVX2) && defined(HAS_RGB565TOARGBROW_AVX2)
void RGB565ToYMatrixRow_AVX2(const uint8_t* src_rgb565,
uint8_t* dst_y,
int width,
const struct ArgbConstants* c) {
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
RGB565ToARGBRow_AVX2(src_rgb565, row, twidth);
ARGBToYMatrixRow_AVX2(row, dst_y, twidth, c);
src_rgb565 += twidth * 2;
dst_y += twidth;
width -= twidth;
}
}
#endif
#if defined(HAS_ARGBTOUVMATRIXROW_AVX2) && defined(HAS_RGB565TOARGBROW_AVX2)
void RGB565ToUVMatrixRow_AVX2(const uint8_t* src_rgb565,
int src_stride_rgb565,
uint8_t* dst_u,
uint8_t* dst_v,
int width,
const struct ArgbConstants* c) {
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
RGB565ToARGBRow_AVX2(src_rgb565, row, twidth);
RGB565ToARGBRow_AVX2(src_rgb565 + src_stride_rgb565, row + MAXTWIDTH * 4,
twidth);
ARGBToUVMatrixRow_AVX2(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c);
src_rgb565 += twidth * 2;
dst_u += twidth / 2;
dst_v += twidth / 2;
width -= twidth;
}
}
#endif
#if defined(HAS_RGB565TOARGBROW_NEON) && defined(HAS_ARGBTOYMATRIXROW_NEON)
void RGB565ToYMatrixRow_NEON(const uint8_t* src_rgb565,
uint8_t* dst_y,
int width,
const struct ArgbConstants* c) {
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
RGB565ToARGBRow_NEON(src_rgb565, row, twidth);
ARGBToYMatrixRow_NEON(row, dst_y, twidth, c);
src_rgb565 += twidth * 2;
dst_y += twidth;
width -= twidth;
}
}
#endif
#if defined(HAS_RGB565TOARGBROW_NEON) && defined(HAS_ARGBTOUVMATRIXROW_NEON)
void RGB565ToUVMatrixRow_NEON(const uint8_t* src_rgb565,
int src_stride_rgb565,
uint8_t* dst_u,
uint8_t* dst_v,
int width,
const struct ArgbConstants* c) {
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
RGB565ToARGBRow_NEON(src_rgb565, row, twidth);
RGB565ToARGBRow_NEON(src_rgb565 + src_stride_rgb565, row + MAXTWIDTH * 4,
twidth);
ARGBToUVMatrixRow_NEON(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c);
src_rgb565 += twidth * 2;
dst_u += twidth / 2;
dst_v += twidth / 2;
width -= twidth;
}
}
#endif
void ARGB1555ToYMatrixRow_C(const uint8_t* src_argb1555,
uint8_t* dst_y,
int width,
const struct ArgbConstants* c) {
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
ARGB1555ToARGBRow_C(src_argb1555, row, twidth);
ARGBToYMatrixRow_C(row, dst_y, twidth, c);
src_argb1555 += twidth * 2;
dst_y += twidth;
width -= twidth;
}
}
void ARGB1555ToUVMatrixRow_C(const uint8_t* src_argb1555,
int src_stride_argb1555,
uint8_t* dst_u,
uint8_t* dst_v,
int width,
const struct ArgbConstants* c) {
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
ARGB1555ToARGBRow_C(src_argb1555, row, twidth);
ARGB1555ToARGBRow_C(src_argb1555 + src_stride_argb1555, row + MAXTWIDTH * 4,
twidth);
ARGBToUVMatrixRow_C(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c);
src_argb1555 += twidth * 2;
dst_u += twidth / 2;
dst_v += twidth / 2;
width -= twidth;
}
}
void ARGB4444ToYMatrixRow_C(const uint8_t* src_argb4444,
uint8_t* dst_y,
int width,
const struct ArgbConstants* c) {
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
ARGB4444ToARGBRow_C(src_argb4444, row, twidth);
ARGBToYMatrixRow_C(row, dst_y, twidth, c);
src_argb4444 += twidth * 2;
dst_y += twidth;
width -= twidth;
}
}
void ARGB4444ToUVMatrixRow_C(const uint8_t* src_argb4444,
int src_stride_argb4444,
uint8_t* dst_u,
uint8_t* dst_v,
int width,
const struct ArgbConstants* c) {
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
ARGB4444ToARGBRow_C(src_argb4444, row, twidth);
ARGB4444ToARGBRow_C(src_argb4444 + src_stride_argb4444, row + MAXTWIDTH * 4,
twidth);
ARGBToUVMatrixRow_C(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c);
src_argb4444 += twidth * 2;
dst_u += twidth / 2;
dst_v += twidth / 2;
width -= twidth;
}
}
#if defined(HAS_ARGBTOYMATRIXROW_AVX2)
#if defined(HAS_ARGB1555TOARGBROW_AVX2)
void ARGB1555ToYMatrixRow_AVX2(const uint8_t* src_argb1555,
uint8_t* dst_y,
int width,
const struct ArgbConstants* c) {
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
ARGB1555ToARGBRow_AVX2(src_argb1555, row, twidth);
ARGBToYMatrixRow_AVX2(row, dst_y, twidth, c);
src_argb1555 += twidth * 2;
dst_y += twidth;
width -= twidth;
}
}
#endif
#if defined(HAS_ARGB4444TOARGBROW_AVX2)
void ARGB4444ToYMatrixRow_AVX2(const uint8_t* src_argb4444,
uint8_t* dst_y,
int width,
const struct ArgbConstants* c) {
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
ARGB4444ToARGBRow_AVX2(src_argb4444, row, twidth);
ARGBToYMatrixRow_AVX2(row, dst_y, twidth, c);
src_argb4444 += twidth * 2;
dst_y += twidth;
width -= twidth;
}
}
#endif
#endif
#if defined(HAS_ARGBTOUVMATRIXROW_AVX2)
#if defined(HAS_ARGB1555TOARGBROW_AVX2)
void ARGB1555ToUVMatrixRow_AVX2(const uint8_t* src_argb1555,
int src_stride_argb1555,
uint8_t* dst_u,
uint8_t* dst_v,
int width,
const struct ArgbConstants* c) {
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
ARGB1555ToARGBRow_AVX2(src_argb1555, row, twidth);
ARGB1555ToARGBRow_AVX2(src_argb1555 + src_stride_argb1555,
row + MAXTWIDTH * 4, twidth);
ARGBToUVMatrixRow_AVX2(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c);
src_argb1555 += twidth * 2;
dst_u += twidth / 2;
dst_v += twidth / 2;
width -= twidth;
}
}
#endif
#if defined(HAS_ARGB4444TOARGBROW_AVX2)
void ARGB4444ToUVMatrixRow_AVX2(const uint8_t* src_argb4444,
int src_stride_argb4444,
uint8_t* dst_u,
uint8_t* dst_v,
int width,
const struct ArgbConstants* c) {
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
ARGB4444ToARGBRow_AVX2(src_argb4444, row, twidth);
ARGB4444ToARGBRow_AVX2(src_argb4444 + src_stride_argb4444,
row + MAXTWIDTH * 4, twidth);
ARGBToUVMatrixRow_AVX2(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c);
src_argb4444 += twidth * 2;
dst_u += twidth / 2;
dst_v += twidth / 2;
width -= twidth;
}
}
#endif
#endif
#if defined(HAS_ARGBTOYMATRIXROW_NEON) && defined(HAS_ARGB1555TOARGBROW_NEON)
void ARGB1555ToYMatrixRow_NEON(const uint8_t* src_argb1555,
uint8_t* dst_y,
int width,
const struct ArgbConstants* c) {
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
ARGB1555ToARGBRow_NEON(src_argb1555, row, twidth);
ARGBToYMatrixRow_NEON(row, dst_y, twidth, c);
src_argb1555 += twidth * 2;
dst_y += twidth;
width -= twidth;
}
}
#endif
#if defined(HAS_ARGBTOYMATRIXROW_NEON) && defined(HAS_ARGB4444TOARGBROW_NEON)
void ARGB4444ToYMatrixRow_NEON(const uint8_t* src_argb4444,
uint8_t* dst_y,
int width,
const struct ArgbConstants* c) {
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
ARGB4444ToARGBRow_NEON(src_argb4444, row, twidth);
ARGBToYMatrixRow_NEON(row, dst_y, twidth, c);
src_argb4444 += twidth * 2;
dst_y += twidth;
width -= twidth;
}
}
#endif
#if defined(HAS_ARGBTOUVMATRIXROW_NEON) && defined(HAS_ARGB1555TOARGBROW_NEON)
void ARGB1555ToUVMatrixRow_NEON(const uint8_t* src_argb1555,
int src_stride_argb1555,
uint8_t* dst_u,
uint8_t* dst_v,
int width,
const struct ArgbConstants* c) {
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
ARGB1555ToARGBRow_NEON(src_argb1555, row, twidth);
ARGB1555ToARGBRow_NEON(src_argb1555 + src_stride_argb1555,
row + MAXTWIDTH * 4, twidth);
ARGBToUVMatrixRow_NEON(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c);
src_argb1555 += twidth * 2;
dst_u += twidth / 2;
dst_v += twidth / 2;
width -= twidth;
}
}
#endif
#if defined(HAS_ARGBTOUVMATRIXROW_NEON) && defined(HAS_ARGB4444TOARGBROW_NEON)
void ARGB4444ToUVMatrixRow_NEON(const uint8_t* src_argb4444,
int src_stride_argb4444,
uint8_t* dst_u,
uint8_t* dst_v,
int width,
const struct ArgbConstants* c) {
SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4 * 2]);
while (width > 0) {
int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
ARGB4444ToARGBRow_NEON(src_argb4444, row, twidth);
ARGB4444ToARGBRow_NEON(src_argb4444 + src_stride_argb4444,
row + MAXTWIDTH * 4, twidth);
ARGBToUVMatrixRow_NEON(row, MAXTWIDTH * 4, dst_u, dst_v, twidth, c);
src_argb4444 += twidth * 2;
dst_u += twidth / 2;
dst_v += twidth / 2;
width -= twidth;
}
}
#endif
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv

File diff suppressed because it is too large Load Diff

View File

@ -2027,12 +2027,10 @@ struct ArgbConstants {
// R * 0.2990 coefficient = 77
// Add 0.5 = 0x80
static const struct ArgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0},
128,
0};
128,
0};
static const struct ArgbConstants kRawJPEGConstants = {{77, 150, 29, 0},
128,
0};
static const struct ArgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128, 0};
// RGB to BT.601 coefficients
// B * 0.1016 coefficient = 25
@ -2041,19 +2039,19 @@ static const struct ArgbConstants kRawJPEGConstants = {{77, 150, 29, 0},
// Add 16.5 = 0x1080
static const struct ArgbConstants kRgb24I601Constants = {{25, 129, 66, 0},
0x1080,
0};
0x1080,
0};
static const struct ArgbConstants kRawI601Constants = {{66, 129, 25, 0},
0x1080,
0};
0x1080,
0};
#endif // ArgbConstants
// ARGB expects first 3 values to contain RGB and 4th value is ignored.
void ARGBToYMatrixRow_LASX(const uint8_t* src_argb,
uint8_t* dst_y,
int width,
const struct ArgbConstants* c) {
uint8_t* dst_y,
int width,
const struct ArgbConstants* c) {
int32_t shuff[8] = {0, 4, 1, 5, 2, 6, 3, 7};
asm volatile(
"xvldrepl.b $xr0, %3, 0 \n\t" // load rgbconstants
@ -2218,14 +2216,18 @@ static void RGBToYMatrixRow_LASX(const uint8_t* src_rgba,
"xvst $xr10, %1, 0 \n\t"
"addi.d %1, %1, 32 \n\t"
"bnez %2, 1b \n\t"
: "+&r"(src_rgba), // %0
"+&r"(dst_y), // %1
"+&r"(width) // %2
: "r"(c), // %3
"r"(shuff) // %4
: "+&r"(src_rgba), // %0
"+&r"(dst_y), // %1
"+&r"(width) // %2
: "r"(c), // %3
"r"(shuff) // %4
: "memory");
}
void ARGBToUVJRow_LASX(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_u,

View File

@ -2812,12 +2812,10 @@ struct ArgbConstants {
// R * 0.2990 coefficient = 77
// Add 0.5 = 0x80
static const struct ArgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0},
128,
0};
128,
0};
static const struct ArgbConstants kRawJPEGConstants = {{77, 150, 29, 0},
128,
0};
static const struct ArgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128, 0};
// RGB to BT.601 coefficients
// B * 0.1016 coefficient = 25
@ -2826,19 +2824,19 @@ static const struct ArgbConstants kRawJPEGConstants = {{77, 150, 29, 0},
// Add 16.5 = 0x1080
static const struct ArgbConstants kRgb24I601Constants = {{25, 129, 66, 0},
0x1080,
0};
0x1080,
0};
static const struct ArgbConstants kRawI601Constants = {{66, 129, 25, 0},
0x1080,
0};
0x1080,
0};
#endif // ArgbConstants
// ARGB expects first 3 values to contain RGB and 4th value is ignored.
void ARGBToYMatrixRow_LSX(const uint8_t* src_argb,
uint8_t* dst_y,
int width,
const struct ArgbConstants* c) {
uint8_t* dst_y,
int width,
const struct ArgbConstants* c) {
asm volatile(
"vldrepl.b $vr0, %3, 0 \n\t" // load rgbconstants
"vldrepl.b $vr1, %3, 1 \n\t" // load rgbconstants
@ -2989,14 +2987,18 @@ static void RGBToYMatrixRow_LSX(const uint8_t* src_rgba,
"vst $vr10, %1, 0 \n\t"
"addi.d %1, %1, 16 \n\t"
"bnez %2, 1b \n\t"
: "+&r"(src_rgba), // %0
"+&r"(dst_y), // %1
"+&r"(width) // %2
: "r"(c), // %3
"r"(shuff) // %4
: "+&r"(src_rgba), // %0
"+&r"(dst_y), // %1
"+&r"(width) // %2
: "r"(c), // %3
"r"(shuff) // %4
: "memory");
}
// undef for unified sources build
#undef YUVTORGB_SETUP
#undef READYUV422_D

View File

@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/convert_from_argb.h" // For ArgbConstants
#include "libyuv/row.h"
#include "libyuv/convert_from_argb.h" // For ArgbConstants
#ifdef __cplusplus
namespace libyuv {
@ -272,7 +272,7 @@ void I422ToRGBARow_NEON(const uint8_t* src_y,
"subs %[width], %[width], #8 \n" //
YUVTORGB //
RGBTORGB8 //
STORERGBA //
STORERGBA //
"bgt 1b \n"
: [src_y] "+r"(src_y), // %[src_y]
[src_u] "+r"(src_u), // %[src_u]
@ -325,8 +325,9 @@ void I422ToRGB565Row_NEON(const uint8_t* src_y,
YUVTORGB_SETUP
"vmov.u8 d6, #255 \n"
"1: \n" //
READYUV422 "subs %[width], %[width], #8 \n" YUVTORGB
RGBTORGB8 ARGBTORGB565
READYUV422
"subs %[width], %[width], #8 \n" YUVTORGB RGBTORGB8
ARGBTORGB565
"vst1.8 {q2}, [%[dst_rgb565]]! \n" // store 8 pixels RGB565.
"bgt 1b \n"
: [src_y] "+r"(src_y), // %[src_y]
@ -1847,54 +1848,45 @@ void ARGBToUV444MatrixRow_NEON(const uint8_t* src_argb,
int width,
const struct ArgbConstants* c) {
asm volatile(
"vld1.8 {d24}, [%4] \n" // load kRGBToU
"vld1.8 {d25}, [%5] \n" // load kRGBToV
"vld1.16 {d26[0]}, [%6] \n" // load kAddUV[0]
"vmovl.s8 q10, d24 \n" // U coeffs (8 shorts)
"vmovl.s8 q11, d25 \n" // V coeffs (8 shorts)
"vdup.16 q6, d26[0] \n" // bias
"vld1.8 {d16}, [%4] \n" // load kRGBToU
"vld1.8 {d17}, [%5] \n" // load kRGBToV
"vld1.16 {d18[0]}, [%6] \n" // load kAddUV[0]
"vabs.s8 d16, d16 \n" // BU, GU, RU
"vabs.s8 d17, d17 \n" // BV, GV, RV
"vdup.8 d20, d16[0] \n" // BU
"vdup.8 d21, d16[1] \n" // GU
"vdup.8 d22, d16[2] \n" // RU
"vdup.8 d23, d17[0] \n" // BV
"vdup.8 d24, d17[1] \n" // GV
"vdup.8 d25, d17[2] \n" // RV
"vdup.16 q15, d18[0] \n" // kAddUV
"1: \n"
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
"subs %3, %3, #8 \n" // 8 processed per loop.
"vmull.u8 q2, d0, d20 \n" // B * BU
"vmlsl.u8 q2, d1, d21 \n" // - G * GU
"vmlsl.u8 q2, d2, d22 \n" // - R * RU
"vmovl.u8 q4, d0 \n" // B
"vmovl.u8 q5, d1 \n" // G
"vmovl.u8 q7, d2 \n" // R
"vmovl.u8 q8, d3 \n" // A
"vmull.u8 q3, d2, d25 \n" // R * RV
"vmlsl.u8 q3, d1, d24 \n" // - G * GV
"vmlsl.u8 q3, d0, d23 \n" // - B * BV
"vdup.16 q12, d20[0] \n"
"vmul.s16 q2, q4, q12 \n" // U = B * U0
"vdup.16 q12, d20[1] \n"
"vmla.s16 q2, q5, q12 \n" // U += G * U1
"vdup.16 q12, d20[2] \n"
"vmla.s16 q2, q7, q12 \n" // U += R * U2
"vdup.16 q12, d20[3] \n"
"vmla.s16 q2, q8, q12 \n" // U += A * U3
"vdup.16 q12, d22[0] \n"
"vmul.s16 q3, q4, q12 \n" // V = B * V0
"vdup.16 q12, d22[1] \n"
"vmla.s16 q3, q5, q12 \n" // V += G * V1
"vdup.16 q12, d22[2] \n"
"vmla.s16 q3, q7, q12 \n" // V += R * V2
"vdup.16 q12, d22[3] \n"
"vmla.s16 q3, q8, q12 \n" // V += A * V3
"vsubhn.s16 d0, q6, q2 \n" // 128.0 - U
"vsubhn.s16 d1, q6, q3 \n" // 128.0 - V
"vaddhn.u16 d0, q2, q15 \n" // signed -> unsigned
"vaddhn.u16 d1, q3, q15 \n"
"vst1.8 {d0}, [%1]! \n" // store 8 pixels U.
"vst1.8 {d1}, [%2]! \n" // store 8 pixels V.
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
"+r"(width) // %3
: "r"(&c->kRGBToU), // %4
"r"(&c->kRGBToV), // %5
"r"(&c->kAddUV) // %6
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8",
"q10", "q11", "q12");
: "+r"(src_argb), // %0
"+r"(dst_u), // %1
"+r"(dst_v), // %2
"+r"(width) // %3
: "r"(&c->kRGBToU), // %4
"r"(&c->kRGBToV), // %5
"r"(&c->kAddUV) // %6
: "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11",
"q12", "q13", "q14", "q15");
}
void ARGBToUV444Row_NEON(const uint8_t* src_argb,
@ -1911,6 +1903,7 @@ void ARGBToUVJ444Row_NEON(const uint8_t* src_argb,
ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width, &kArgbJPEGConstants);
}
// clang-format off
// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16.
#define RGBTOUV(QB, QG, QR) \
@ -1932,68 +1925,61 @@ void ARGBToUVMatrixRow_NEON(const uint8_t* src_argb,
int width,
const struct ArgbConstants* c) {
const uint8_t* src_argb_1 = src_argb + src_stride_argb;
asm volatile(
"vld1.8 {d24}, [%5] \n" // load kRGBToU (8 bytes,
// only 4 used)
"vld1.8 {d25}, [%6] \n" // load kRGBToV
"vmovl.s8 q14, d24 \n" // U coeffs in d28
"vmovl.s8 q15, d25 \n" // V coeffs in d30
"vmov.u16 q11, #0x8000 \n" // 128.0 bias
asm volatile (
"vld1.8 {d18}, [%5] \n" // load kRGBToU
"vld1.8 {d19}, [%6] \n" // load kRGBToV
"vmovl.s8 q8, d18 \n" // U coeffs in q8 (d16, d17)
"vmovl.s8 q9, d19 \n" // V coeffs in q9 (d18, d19)
"vdup.16 q10, d16[0] \n" // U0
"vdup.16 q11, d16[1] \n" // U1
"vdup.16 q12, d16[2] \n" // U2
"vdup.16 q13, d18[0] \n" // V0
"vdup.16 q14, d18[1] \n" // V1
"vdup.16 q15, d18[2] \n" // V2
"1: \n"
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB
// pixels.
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels.
"subs %4, %4, #16 \n" // 16 processed per loop.
"vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
"vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
"vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts.
"vpaddl.u8 q3, q3 \n" // A 16 bytes -> 8 shorts.
"vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more pixels.
"vld4.8 {d9, d11, d13, d15}, [%1]! \n"
"vpadal.u8 q0, q4 \n" // B
"vpadal.u8 q1, q5 \n" // G
"vpadal.u8 q2, q6 \n" // R
"vpadal.u8 q3, q7 \n" // A
"vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ARGB pixels.
"vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ARGB pixels.
"vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts.
"vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
"vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts.
"vrshr.u16 q0, q0, #2 \n" // average of 4
"vrshr.u16 q1, q1, #2 \n"
"vrshr.u16 q2, q2, #2 \n"
"vrshr.u16 q3, q3, #2 \n"
"vdup.16 q12, d28[0] \n"
"vmul.s16 q8, q0, q12 \n" // U = B * U0
"vdup.16 q12, d28[1] \n"
"vmla.s16 q8, q1, q12 \n" // U += G * U1
"vdup.16 q12, d28[2] \n"
"vmov.u16 q3, #0x8000 \n" // 128.0
"vmul.s16 q8, q0, q10 \n" // U = B * U0
"vmla.s16 q8, q1, q11 \n" // U += G * U1
"vmla.s16 q8, q2, q12 \n" // U += R * U2
"vdup.16 q12, d28[3] \n"
"vmla.s16 q8, q3, q12 \n" // U += A * U3
"vdup.16 q12, d30[0] \n"
"vmul.s16 q9, q0, q12 \n" // V = B * V0
"vdup.16 q12, d30[1] \n"
"vmla.s16 q9, q1, q12 \n" // V += G * V1
"vdup.16 q12, d30[2] \n"
"vmla.s16 q9, q2, q12 \n" // V += R * V2
"vdup.16 q12, d30[3] \n"
"vmla.s16 q9, q3, q12 \n" // V += A * V3
"vmul.s16 q9, q0, q13 \n" // V = B * V0
"vmla.s16 q9, q1, q14 \n" // V += G * V1
"vmla.s16 q9, q2, q15 \n" // V += R * V2
"vsubhn.s16 d0, q11, q8 \n" // 128.0 - U
"vsubhn.s16 d1, q11, q9 \n" // 128.0 - V
"vsubhn.s16 d0, q3, q8 \n" // 128.0 - U
"vsubhn.s16 d1, q3, q9 \n" // 128.0 - V
"vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
"vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(src_argb_1), // %1
"+r"(dst_u), // %2
"+r"(dst_v), // %3
"+r"(width) // %4
: "r"(&c->kRGBToU), // %5
"r"(&c->kRGBToV) // %6
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8",
"q9", "q11", "q12", "q14", "q15");
: "+r"(src_argb), // %0
"+r"(src_argb_1), // %1
"+r"(dst_u), // %2
"+r"(dst_v), // %3
"+r"(width) // %4
: "r"(&c->kRGBToU), // %5
"r"(&c->kRGBToV) // %6
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
void ARGBToUVRow_NEON(const uint8_t* src_argb,
@ -2226,8 +2212,44 @@ void BGRAToUVRow_NEON(const uint8_t* src_bgra,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
ARGBToUVMatrixRow_NEON(src_bgra, src_stride_bgra, dst_u, dst_v, width,
&kBgraI601Constants);
asm volatile (
"add %1, %0, %1 \n" // src_stride + src_bgra
"vmov.s16 q10, #112 \n" // UB/VR 0.875 coefficient
"vmov.s16 q11, #74 \n" // UG -0.5781 coefficient
"vmov.s16 q12, #38 \n" // UR -0.2969 coefficient
"vmov.s16 q13, #18 \n" // VB -0.1406 coefficient
"vmov.s16 q14, #94 \n" // VG -0.7344 coefficient
"vmov.u16 q15, #0x8000 \n" // 128.0
"1: \n"
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 BGRA pixels.
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 BGRA pixels.
"subs %4, %4, #16 \n" // 16 processed per loop.
"vpaddl.u8 q3, q3 \n" // B 16 bytes -> 8 shorts.
"vpaddl.u8 q2, q2 \n" // G 16 bytes -> 8 shorts.
"vpaddl.u8 q1, q1 \n" // R 16 bytes -> 8 shorts.
"vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more BGRA pixels.
"vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 BGRA pixels.
"vpadal.u8 q3, q7 \n" // B 16 bytes -> 8 shorts.
"vpadal.u8 q2, q6 \n" // G 16 bytes -> 8 shorts.
"vpadal.u8 q1, q5 \n" // R 16 bytes -> 8 shorts.
"vrshr.u16 q1, q1, #2 \n" // average of 4
"vrshr.u16 q2, q2, #2 \n"
"vrshr.u16 q3, q3, #2 \n"
RGBTOUV(q3, q2, q1)
"vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
"vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
"bgt 1b \n"
: "+r"(src_bgra), // %0
"+r"(src_stride_bgra), // %1
"+r"(dst_u), // %2-
"+r"(dst_v), // %3
"+r"(width) // %4
:
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
void ABGRToUVRow_NEON(const uint8_t* src_abgr,
@ -2235,8 +2257,44 @@ void ABGRToUVRow_NEON(const uint8_t* src_abgr,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
ARGBToUVMatrixRow_NEON(src_abgr, src_stride_abgr, dst_u, dst_v, width,
&kAbgrI601Constants);
asm volatile (
"add %1, %0, %1 \n" // src_stride + src_abgr
"vmov.s16 q10, #112 \n" // UB/VR 0.875 coefficient
"vmov.s16 q11, #74 \n" // UG -0.5781 coefficient
"vmov.s16 q12, #38 \n" // UR -0.2969 coefficient
"vmov.s16 q13, #18 \n" // VB -0.1406 coefficient
"vmov.s16 q14, #94 \n" // VG -0.7344 coefficient
"vmov.u16 q15, #0x8000 \n" // 128.0
"1: \n"
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ABGR pixels.
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ABGR pixels.
"subs %4, %4, #16 \n" // 16 processed per loop.
"vpaddl.u8 q2, q2 \n" // B 16 bytes -> 8 shorts.
"vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
"vpaddl.u8 q0, q0 \n" // R 16 bytes -> 8 shorts.
"vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ABGR pixels.
"vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ABGR pixels.
"vpadal.u8 q2, q6 \n" // B 16 bytes -> 8 shorts.
"vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts.
"vpadal.u8 q0, q4 \n" // R 16 bytes -> 8 shorts.
"vrshr.u16 q0, q0, #2 \n" // average of 4
"vrshr.u16 q1, q1, #2 \n"
"vrshr.u16 q2, q2, #2 \n"
RGBTOUV(q2, q1, q0)
"vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
"vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
"bgt 1b \n"
: "+r"(src_abgr), // %0
"+r"(src_stride_abgr), // %1
"+r"(dst_u), // %2
"+r"(dst_v), // %3
"+r"(width) // %4
:
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
void RGBAToUVRow_NEON(const uint8_t* src_rgba,
@ -2244,8 +2302,44 @@ void RGBAToUVRow_NEON(const uint8_t* src_rgba,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
ARGBToUVMatrixRow_NEON(src_rgba, src_stride_rgba, dst_u, dst_v, width,
&kRgbaI601Constants);
asm volatile (
"add %1, %0, %1 \n" // src_stride + src_rgba
"vmov.s16 q10, #112 \n" // UB/VR 0.875 coefficient
"vmov.s16 q11, #74 \n" // UG -0.5781 coefficient
"vmov.s16 q12, #38 \n" // UR -0.2969 coefficient
"vmov.s16 q13, #18 \n" // VB -0.1406 coefficient
"vmov.s16 q14, #94 \n" // VG -0.7344 coefficient
"vmov.u16 q15, #0x8000 \n" // 128.0
"1: \n"
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 RGBA pixels.
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 RGBA pixels.
"subs %4, %4, #16 \n" // 16 processed per loop.
"vpaddl.u8 q0, q1 \n" // B 16 bytes -> 8 shorts.
"vpaddl.u8 q1, q2 \n" // G 16 bytes -> 8 shorts.
"vpaddl.u8 q2, q3 \n" // R 16 bytes -> 8 shorts.
"vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more RGBA pixels.
"vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 RGBA pixels.
"vpadal.u8 q0, q5 \n" // B 16 bytes -> 8 shorts.
"vpadal.u8 q1, q6 \n" // G 16 bytes -> 8 shorts.
"vpadal.u8 q2, q7 \n" // R 16 bytes -> 8 shorts.
"vrshr.u16 q0, q0, #2 \n" // average of 4
"vrshr.u16 q1, q1, #2 \n"
"vrshr.u16 q2, q2, #2 \n"
RGBTOUV(q0, q1, q2)
"vst1.8 {d0}, [%2]! \n" // store 8 pixels U.
"vst1.8 {d1}, [%3]! \n" // store 8 pixels V.
"bgt 1b \n"
: "+r"(src_rgba), // %0
"+r"(src_stride_rgba), // %1
"+r"(dst_u), // %2
"+r"(dst_v), // %3
"+r"(width) // %4
:
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
void RGB24ToUVRow_NEON(const uint8_t* src_rgb24,
@ -2703,20 +2797,19 @@ void AB64ToARGBRow_NEON(const uint16_t* src_ab64,
// ARGB expects first 3 values to contain RGB and 4th value is ignored.
void ARGBToYMatrixRow_NEON(const uint8_t* src_argb,
uint8_t* dst_y,
int width,
const struct ArgbConstants* c) {
uint8_t* dst_y,
int width,
const struct ArgbConstants* c) {
asm volatile(
"vld1.8 {d24}, [%3] \n" // load kRGBToY
"vld1.16 {d25[0]}, [%4] \n" // load kAddY[0]
"vdup.8 d20, d24[0] \n" // B
"vdup.8 d21, d24[1] \n" // G
"vdup.8 d22, d24[2] \n" // R
"vdup.8 d23, d24[3] \n" // A
"vdup.16 q12, d25[0] \n" // bias
"vld1.8 {d16}, [%3] \n" // load kRGBToY
"vld1.16 {d18[0]}, [%4] \n" // load kAddY[0]
"vdup.8 d20, d16[0] \n" // BY
"vdup.8 d21, d16[1] \n" // GY
"vdup.8 d22, d16[2] \n" // RY
"vdup.16 q12, d18[0] \n" // AY
"1: \n"
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 pixels of ARGB
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 pixels
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 16 pixels of ARGB
"vld4.8 {d1, d3, d5, d7}, [%0]! \n"
"subs %1, %1, #16 \n" // 16 processed per loop.
"vmull.u8 q8, d0, d20 \n" // B
"vmull.u8 q9, d1, d20 \n"
@ -2724,8 +2817,6 @@ void ARGBToYMatrixRow_NEON(const uint8_t* src_argb,
"vmlal.u8 q9, d3, d21 \n"
"vmlal.u8 q8, d4, d22 \n" // R
"vmlal.u8 q9, d5, d22 \n"
"vmlal.u8 q8, d6, d23 \n" // A
"vmlal.u8 q9, d7, d23 \n"
"vaddhn.u16 d0, q8, q12 \n" // 16 bit to 8 bit Y
"vaddhn.u16 d1, q9, q12 \n"
"vst1.8 {d0, d1}, [%2]! \n" // store 16 pixels Y.
@ -2735,8 +2826,8 @@ void ARGBToYMatrixRow_NEON(const uint8_t* src_argb,
"+r"(dst_y) // %2
: "r"(&c->kRGBToY), // %3
"r"(&c->kAddY) // %4
: "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12",
"d24", "d25");
: "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "d20", "d21", "d22",
"q12");
}
void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) {
@ -2755,33 +2846,65 @@ void ABGRToYJRow_NEON(const uint8_t* src_abgr, uint8_t* dst_yj, int width) {
ARGBToYMatrixRow_NEON(src_abgr, dst_yj, width, &kAbgrJPEGConstants);
}
// RGBA expects first value to be A and ignored, then 3 values to contain RGB.
// Same code as ARGB, except the LD4
static void RGBAToYMatrixRow_NEON(const uint8_t* src_rgba,
uint8_t* dst_y,
int width,
const struct ArgbConstants* c) {
asm volatile(
"vld1.8 {d16}, [%3] \n" // load kRGBToY
"vld1.16 {d18[0]}, [%4] \n" // load kAddY[0]
"vdup.8 d20, d16[0] \n" // BY
"vdup.8 d21, d16[1] \n" // GY
"vdup.8 d22, d16[2] \n" // RY
"vdup.16 q12, d18[0] \n" // AY
"1: \n"
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 16 pixels of RGBA
"vld4.8 {d1, d3, d5, d7}, [%0]! \n"
"subs %2, %2, #16 \n" // 16 processed per loop.
"vmull.u8 q8, d2, d20 \n" // B
"vmull.u8 q9, d3, d20 \n"
"vmlal.u8 q8, d4, d21 \n" // G
"vmlal.u8 q9, d5, d21 \n"
"vmlal.u8 q8, d6, d22 \n" // R
"vmlal.u8 q9, d7, d22 \n"
"vaddhn.u16 d0, q8, q12 \n" // 16 bit to 8 bit Y
"vaddhn.u16 d1, q9, q12 \n"
"vst1.8 {d0, d1}, [%1]! \n" // store 16 pixels Y.
"bgt 1b \n"
: "+r"(src_rgba), // %0
"+r"(dst_y), // %1
"+r"(width) // %2
: "r"(&c->kRGBToY), // %3
"r"(&c->kAddY) // %4
: "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "d20", "d21", "d22",
"q12");
}
void RGBAToYRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
ARGBToYMatrixRow_NEON(src_rgba, dst_y, width, &kRgbaI601Constants);
RGBAToYMatrixRow_NEON(src_rgba, dst_y, width, &kArgbI601Constants);
}
void RGBAToYJRow_NEON(const uint8_t* src_rgba, uint8_t* dst_yj, int width) {
ARGBToYMatrixRow_NEON(src_rgba, dst_yj, width, &kRgbaJPEGConstants);
RGBAToYMatrixRow_NEON(src_rgba, dst_yj, width, &kArgbJPEGConstants);
}
void BGRAToYRow_NEON(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
ARGBToYMatrixRow_NEON(src_bgra, dst_y, width, &kBgraI601Constants);
}
void BGRAToYJRow_NEON(const uint8_t* src_bgra, uint8_t* dst_yj, int width) {
ARGBToYMatrixRow_NEON(src_bgra, dst_yj, width, &kBgraJPEGConstants);
RGBAToYMatrixRow_NEON(src_bgra, dst_y, width, &kAbgrI601Constants);
}
void RGBToYMatrixRow_NEON(const uint8_t* src_rgb,
uint8_t* dst_y,
int width,
const struct ArgbConstants* c) {
uint8_t* dst_y,
int width,
const struct ArgbConstants* c) {
asm volatile(
"vld1.8 {d24}, [%3] \n" // load kRGBToY
"vld1.16 {d25[0]}, [%4] \n" // load kAddY[0]
"vdup.8 d20, d24[0] \n" // BY
"vdup.8 d21, d24[1] \n" // GY
"vdup.8 d22, d24[2] \n" // RY
"vdup.16 q12, d25[0] \n" // AY
"vld1.8 {d16}, [%3] \n" // load kRGBToY
"vld1.16 {d18[0]}, [%4] \n" // load kAddY[0]
"vdup.8 d20, d16[0] \n" // BY
"vdup.8 d21, d16[1] \n" // GY
"vdup.8 d22, d16[2] \n" // RY
"vdup.16 q12, d18[0] \n" // AY
"1: \n"
"vld3.8 {d2, d4, d6}, [%0]! \n" // load 16 pixels of
// RGB24.
@ -2802,10 +2925,14 @@ void RGBToYMatrixRow_NEON(const uint8_t* src_rgb,
"+r"(width) // %2
: "r"(&c->kRGBToY), // %3
"r"(&c->kAddY) // %4
: "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12",
"d24", "d25");
: "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "d20", "d21", "d22",
"q12");
}
// Bilinear filter 16x2 -> 16x1
void InterpolateRow_NEON(uint8_t* dst_ptr,
const uint8_t* src_ptr,

File diff suppressed because it is too large Load Diff

View File

@ -1249,22 +1249,16 @@ void MergeUVRow_RVV(const uint8_t* src_u,
}
#endif
// RGB to JPeg coefficients
// B * 0.1140 coefficient = 29
// G * 0.5870 coefficient = 150
// R * 0.2990 coefficient = 77
// Add 0.5 = 0x80
static const struct ArgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0},
{0},
{0},
{128},
{0}};
static const struct ArgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0}, {0}, {0}, {128}, {0}};
static const struct ArgbConstants kRawJPEGConstants = {{77, 150, 29, 0},
{0},
{0},
{128},
{0}};
static const struct ArgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, {0}, {0}, {128}, {0}};
// RGB to BT.601 coefficients
// B * 0.1016 coefficient = 25
@ -1272,24 +1266,16 @@ static const struct ArgbConstants kRawJPEGConstants = {{77, 150, 29, 0},
// R * 0.2578 coefficient = 66
// Add 16.5 = 0x1080
static const struct ArgbConstants kRgb24I601Constants = {{25, 129, 66, 0},
{0},
{0},
{0x1080},
{0}};
static const struct ArgbConstants kRgb24I601Constants = {{25, 129, 66, 0}, {0}, {0}, {0x1080}, {0}};
static const struct ArgbConstants kRawI601Constants = {{66, 129, 25, 0},
{0},
{0},
{0x1080},
{0}};
static const struct ArgbConstants kRawI601Constants = {{66, 129, 25, 0}, {0}, {0}, {0x1080}, {0}};
// ARGB expects first 3 values to contain RGB and 4th value is ignored
#ifdef HAS_ARGBTOYMATRIXROW_RVV
void ARGBToYMatrixRow_RVV(const uint8_t* src_argb,
uint8_t* dst_y,
int width,
const struct ArgbConstants* c) {
uint8_t* dst_y,
int width,
const struct ArgbConstants* c) {
assert(width != 0);
size_t w = (size_t)width;
vuint8m2_t v_by, v_gy, v_ry; // vectors are to store RGBToY constant

View File

@ -1127,10 +1127,9 @@ __arm_locally_streaming void ARGBToUVMatrixRow_SME(
uint8_t* dst_v,
int width,
const struct ArgbConstants* c) {
int8_t uvconstants[8] = {(int8_t)c->kRGBToU[0], (int8_t)c->kRGBToU[1],
(int8_t)c->kRGBToU[2], (int8_t)c->kRGBToU[3],
(int8_t)c->kRGBToV[0], (int8_t)c->kRGBToV[1],
(int8_t)c->kRGBToV[2], (int8_t)c->kRGBToV[3]};
int8_t uvconstants[8] = {
(int8_t)c->kRGBToU[0], (int8_t)c->kRGBToU[1], (int8_t)c->kRGBToU[2], (int8_t)c->kRGBToU[3],
(int8_t)c->kRGBToV[0], (int8_t)c->kRGBToV[1], (int8_t)c->kRGBToV[2], (int8_t)c->kRGBToV[3]};
ARGBToUVMatrixRow_SVE_SC(src_argb, src_stride_argb, dst_u, dst_v, width,
uvconstants);
}

View File

@ -223,10 +223,9 @@ void ARGBToUVMatrixRow_SVE2(const uint8_t* src_argb,
uint8_t* dst_v,
int width,
const struct ArgbConstants* c) {
int8_t uvconstants[8] = {(int8_t)c->kRGBToU[0], (int8_t)c->kRGBToU[1],
(int8_t)c->kRGBToU[2], (int8_t)c->kRGBToU[3],
(int8_t)c->kRGBToV[0], (int8_t)c->kRGBToV[1],
(int8_t)c->kRGBToV[2], (int8_t)c->kRGBToV[3]};
int8_t uvconstants[8] = {
(int8_t)c->kRGBToU[0], (int8_t)c->kRGBToU[1], (int8_t)c->kRGBToU[2], (int8_t)c->kRGBToU[3],
(int8_t)c->kRGBToV[0], (int8_t)c->kRGBToV[1], (int8_t)c->kRGBToV[2], (int8_t)c->kRGBToV[3]};
ARGBToUVMatrixRow_SVE_SC(src_argb, src_stride_argb, dst_u, dst_v, width,
uvconstants);
}

View File

@ -8,19 +8,19 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/convert_from_argb.h" // For ArgbConstants
#include "libyuv/row.h"
#include "libyuv/convert_from_argb.h" // For ArgbConstants
// This module is for Visual C 32/64 bit
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || defined(__i386__) || defined(_M_X64) || \
defined(_M_X86)) && \
((defined(_MSC_VER) && !defined(__clang__)) || \
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || defined(__i386__) || \
defined(_M_X64) || defined(_M_X86)) && \
((defined(_MSC_VER) && !defined(__clang__)) || \
defined(LIBYUV_ENABLE_ROWWIN))
#include <emmintrin.h>
#include <immintrin.h> // For AVX2 intrinsics
#include <tmmintrin.h> // For _mm_maddubs_epi16
#include <immintrin.h> // For AVX2 intrinsics
#ifdef __cplusplus
namespace libyuv {
@ -102,91 +102,42 @@ extern "C" {
_mm_storeu_si128((__m128i*)(dst_argb + 16), xmm1); \
dst_argb += 32;
#if defined(HAS_ARGBTOYMATRIXROW_AVX2)
#if defined(HAS_I422TOARGBROW_SSSE3)
#endif
#if defined(HAS_I422ALPHATOARGBROW_SSSE3)
#endif
#if defined(HAS_I444TOARGBROW_SSSE3)
#endif
#if defined(HAS_I444ALPHATOARGBROW_SSSE3)
#endif
#if defined(HAS_ARGBTOYROW_AVX2)
#if defined(__clang__) || defined(__GNUC__)
#define LIBYUV_TARGET_AVX2 __attribute__((target("avx2")))
#define LIBYUV_TARGET_AVX512BW \
__attribute__((target("avx512bw,avx512vl,avx512f")))
#define LIBYUV_TARGET_AVX512BW __attribute__((target("avx512bw,avx512vl,avx512f")))
#else
#define LIBYUV_TARGET_AVX2
#define LIBYUV_TARGET_AVX512BW
#endif
// Convert 32 ARGB pixels (128 bytes) to 32 UV444 values.
#if defined(HAS_ARGBTOYMATRIXROW_AVX2) || defined(HAS_ARGBTOUV444MATRIXROW_AVX2)
LIBYUV_TARGET_AVX2
void ARGBToUV444MatrixRow_AVX2(const uint8_t* src_argb,
uint8_t* dst_u,
uint8_t* dst_v,
int width,
const struct ArgbConstants* c) {
__m256i ymm_u =
_mm256_broadcastsi128_si256(_mm_loadu_si128((const __m128i*)c->kRGBToU));
__m256i ymm_v =
_mm256_broadcastsi128_si256(_mm_loadu_si128((const __m128i*)c->kRGBToV));
__m256i ymm5 = _mm256_set1_epi16((short)0x8000);
__m256i perm_mask = _mm256_setr_epi32(0, 4, 1, 5, 2, 6, 3, 7);
while (width > 0) {
__m256i ymm0 = _mm256_loadu_si256((const __m256i*)src_argb);
__m256i ymm1 = _mm256_loadu_si256((const __m256i*)(src_argb + 32));
__m256i ymm2 = _mm256_loadu_si256((const __m256i*)(src_argb + 64));
__m256i ymm3 = _mm256_loadu_si256((const __m256i*)(src_argb + 96));
src_argb += 128;
__m256i ymm0_u = _mm256_maddubs_epi16(ymm0, ymm_u);
__m256i ymm1_u = _mm256_maddubs_epi16(ymm1, ymm_u);
__m256i ymm2_u = _mm256_maddubs_epi16(ymm2, ymm_u);
__m256i ymm3_u = _mm256_maddubs_epi16(ymm3, ymm_u);
__m256i ymm0_v = _mm256_maddubs_epi16(ymm0, ymm_v);
__m256i ymm1_v = _mm256_maddubs_epi16(ymm1, ymm_v);
__m256i ymm2_v = _mm256_maddubs_epi16(ymm2, ymm_v);
__m256i ymm3_v = _mm256_maddubs_epi16(ymm3, ymm_v);
ymm0_u = _mm256_hadd_epi16(ymm0_u, ymm1_u);
ymm2_u = _mm256_hadd_epi16(ymm2_u, ymm3_u);
ymm0_v = _mm256_hadd_epi16(ymm0_v, ymm1_v);
ymm2_v = _mm256_hadd_epi16(ymm2_v, ymm3_v);
ymm0_u = _mm256_sub_epi16(ymm5, ymm0_u);
ymm2_u = _mm256_sub_epi16(ymm5, ymm2_u);
ymm0_v = _mm256_sub_epi16(ymm5, ymm0_v);
ymm2_v = _mm256_sub_epi16(ymm5, ymm2_v);
ymm0_u = _mm256_srli_epi16(ymm0_u, 8);
ymm2_u = _mm256_srli_epi16(ymm2_u, 8);
ymm0_v = _mm256_srli_epi16(ymm0_v, 8);
ymm2_v = _mm256_srli_epi16(ymm2_v, 8);
ymm0_u = _mm256_packus_epi16(ymm0_u, ymm2_u);
ymm0_u = _mm256_permutevar8x32_epi32(ymm0_u, perm_mask);
ymm0_v = _mm256_packus_epi16(ymm0_v, ymm2_v);
ymm0_v = _mm256_permutevar8x32_epi32(ymm0_v, perm_mask);
_mm256_storeu_si256((__m256i*)dst_u, ymm0_u);
_mm256_storeu_si256((__m256i*)dst_v, ymm0_v);
dst_u += 32;
dst_v += 32;
width -= 32;
}
}
#endif
LIBYUV_TARGET_AVX2
void ARGBToYMatrixRow_AVX2(const uint8_t* src_argb,
uint8_t* dst_y,
int width,
const struct ArgbConstants* c) {
__m256i ymm5 = _mm256_set1_epi8((char)0x80);
__m256i ymm4 =
_mm256_broadcastsi128_si256(_mm_loadu_si128((const __m128i*)c->kRGBToY));
__m256i ymm7 =
_mm256_broadcastsi128_si256(_mm_loadu_si128((const __m128i*)c->kAddY));
__m128i kRGBToY = _mm_loadu_si128((const __m128i*)c->kRGBToY);
__m256i ymm4 = _mm256_broadcastsi128_si256(kRGBToY);
__m128i kAddY = _mm_loadu_si128((const __m128i*)c->kAddY);
__m256i ymm7 = _mm256_broadcastsi128_si256(kAddY);
__m256i ymm6 = _mm256_maddubs_epi16(ymm4, ymm5);
ymm6 = _mm256_hadd_epi16(ymm6, ymm6);
ymm7 = _mm256_sub_epi16(ymm7, ymm6);
@ -266,33 +217,27 @@ void BGRAToYRow_AVX2(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
LIBYUV_TARGET_AVX2
void RAWToARGBRow_AVX2(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
__m256i ymm_alpha = _mm256_set1_epi32(0xff000000);
__m128i shuf_low =
_mm_set_epi8(-1, 9, 10, 11, -1, 6, 7, 8, -1, 3, 4, 5, -1, 0, 1, 2);
__m128i shuf_high =
_mm_set_epi8(-1, 13, 14, 15, -1, 10, 11, 12, -1, 7, 8, 9, -1, 4, 5, 6);
__m128i shuf_low = _mm_set_epi8(-1, 9, 10, 11, -1, 6, 7, 8, -1, 3, 4, 5, -1, 0, 1, 2);
__m128i shuf_high = _mm_set_epi8(-1, 13, 14, 15, -1, 10, 11, 12, -1, 7, 8, 9, -1, 4, 5, 6);
__m256i ymm_shuf = _mm256_broadcastsi128_si256(shuf_low);
__m256i ymm_shuf2 = _mm256_broadcastsi128_si256(shuf_high);
while (width > 0) {
__m128i xmm0 = _mm_loadu_si128((const __m128i*)src_raw);
__m256i ymm0 = _mm256_castsi128_si256(xmm0);
ymm0 = _mm256_inserti128_si256(
ymm0, _mm_loadu_si128((const __m128i*)(src_raw + 12)), 1);
ymm0 = _mm256_inserti128_si256(ymm0, _mm_loadu_si128((const __m128i*)(src_raw + 12)), 1);
__m128i xmm1 = _mm_loadu_si128((const __m128i*)(src_raw + 24));
__m256i ymm1 = _mm256_castsi128_si256(xmm1);
ymm1 = _mm256_inserti128_si256(
ymm1, _mm_loadu_si128((const __m128i*)(src_raw + 36)), 1);
ymm1 = _mm256_inserti128_si256(ymm1, _mm_loadu_si128((const __m128i*)(src_raw + 36)), 1);
__m128i xmm2 = _mm_loadu_si128((const __m128i*)(src_raw + 48));
__m256i ymm2 = _mm256_castsi128_si256(xmm2);
ymm2 = _mm256_inserti128_si256(
ymm2, _mm_loadu_si128((const __m128i*)(src_raw + 60)), 1);
ymm2 = _mm256_inserti128_si256(ymm2, _mm_loadu_si128((const __m128i*)(src_raw + 60)), 1);
__m128i xmm3 = _mm_loadu_si128((const __m128i*)(src_raw + 68));
__m256i ymm3 = _mm256_castsi128_si256(xmm3);
ymm3 = _mm256_inserti128_si256(
ymm3, _mm_loadu_si128((const __m128i*)(src_raw + 80)), 1);
ymm3 = _mm256_inserti128_si256(ymm3, _mm_loadu_si128((const __m128i*)(src_raw + 80)), 1);
ymm0 = _mm256_shuffle_epi8(ymm0, ymm_shuf);
ymm1 = _mm256_shuffle_epi8(ymm1, ymm_shuf);
@ -318,13 +263,10 @@ void RAWToARGBRow_AVX2(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
#ifdef HAS_RAWTOARGBROW_AVX512BW
LIBYUV_TARGET_AVX512BW
void RGBToARGBRow_AVX512BW(const uint8_t* src_raw,
uint8_t* dst_argb,
const __m128i* shuffler,
int width) {
void RGBToARGBRow_AVX512BW(const uint8_t* src_raw, uint8_t* dst_argb, const __m128i* shuffler, int width) {
__m512i zmm_alpha = _mm512_set1_epi32(0xff000000);
__m512i zmm_perm =
_mm512_set_epi32(12, 11, 10, 9, 9, 8, 7, 6, 6, 5, 4, 3, 3, 2, 1, 0);
__m512i zmm_perm = _mm512_set_epi32(
12, 11, 10, 9, 9, 8, 7, 6, 6, 5, 4, 3, 3, 2, 1, 0);
__m512i zmm_shuf = _mm512_broadcast_i32x4(_mm_loadu_si128(shuffler));
while (width > 0) {
@ -360,26 +302,20 @@ void RGBToARGBRow_AVX512BW(const uint8_t* src_raw,
}
LIBYUV_TARGET_AVX512BW
void RAWToARGBRow_AVX512BW(const uint8_t* src_raw,
uint8_t* dst_argb,
int width) {
__m128i shuf =
_mm_set_epi8(-1, 9, 10, 11, -1, 6, 7, 8, -1, 3, 4, 5, -1, 0, 1, 2);
void RAWToARGBRow_AVX512BW(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
__m128i shuf = _mm_set_epi8(-1, 9, 10, 11, -1, 6, 7, 8, -1, 3, 4, 5, -1, 0, 1, 2);
RGBToARGBRow_AVX512BW(src_raw, dst_argb, &shuf, width);
}
LIBYUV_TARGET_AVX512BW
void RGB24ToARGBRow_AVX512BW(const uint8_t* src_rgb24,
uint8_t* dst_argb,
int width) {
__m128i shuf =
_mm_set_epi8(-1, 11, 10, 9, -1, 8, 7, 6, -1, 5, 4, 3, -1, 2, 1, 0);
void RGB24ToARGBRow_AVX512BW(const uint8_t* src_rgb24, uint8_t* dst_argb, int width) {
__m128i shuf = _mm_set_epi8(-1, 11, 10, 9, -1, 8, 7, 6, -1, 5, 4, 3, -1, 2, 1, 0);
RGBToARGBRow_AVX512BW(src_rgb24, dst_argb, &shuf, width);
}
#endif
#ifdef HAS_ARGBTOUVMATRIXROW_AVX2
LIBYUV_TARGET_AVX2
LIBYUV_TARGET_AVX2 __attribute__((no_sanitize("cfi-icall")))
void ARGBToUVMatrixRow_AVX2(const uint8_t* src_argb,
int src_stride_argb,
uint8_t* dst_u,
@ -389,19 +325,16 @@ void ARGBToUVMatrixRow_AVX2(const uint8_t* src_argb,
__m256i ymm_u = _mm256_broadcastsi128_si256(_mm_loadu_si128((const __m128i*)c->kRGBToU));
__m256i ymm_v = _mm256_broadcastsi128_si256(_mm_loadu_si128((const __m128i*)c->kRGBToV));
__m256i ymm_0101 = _mm256_set1_epi16(0x0101);
__m256i ymm_shuf =
_mm256_setr_epi8(0, 4, 1, 5, 2, 6, 3, 7, 8, 12, 9, 13, 10, 14, 11, 15, 0,
4, 1, 5, 2, 6, 3, 7, 8, 12, 9, 13, 10, 14, 11, 15);
__m256i ymm_shuf = _mm256_setr_epi8(0, 4, 1, 5, 2, 6, 3, 7, 8, 12, 9, 13, 10, 14, 11, 15,
0, 4, 1, 5, 2, 6, 3, 7, 8, 12, 9, 13, 10, 14, 11, 15);
__m256i ymm_8000 = _mm256_set1_epi16((short)0x8000);
__m256i ymm_zero = _mm256_setzero_si256();
while (width > 0) {
__m256i ymm0 = _mm256_loadu_si256((const __m256i*)src_argb);
__m256i ymm1 = _mm256_loadu_si256((const __m256i*)(src_argb + 32));
__m256i ymm2 =
_mm256_loadu_si256((const __m256i*)(src_argb + src_stride_argb));
__m256i ymm3 =
_mm256_loadu_si256((const __m256i*)(src_argb + src_stride_argb + 32));
__m256i ymm2 = _mm256_loadu_si256((const __m256i*)(src_argb + src_stride_argb));
__m256i ymm3 = _mm256_loadu_si256((const __m256i*)(src_argb + src_stride_argb + 32));
ymm0 = _mm256_shuffle_epi8(ymm0, ymm_shuf);
ymm1 = _mm256_shuffle_epi8(ymm1, ymm_shuf);
@ -470,515 +403,12 @@ void MergeUVRow_AVX2(const uint8_t* src_u,
}
#endif
#ifdef HAS_MIRRORROW_AVX2
LIBYUV_TARGET_AVX2
void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width) {
__m256i ymm_shuf = _mm256_broadcastsi128_si256(
_mm_setr_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0));
src += width;
while (width > 0) {
src -= 32;
__m256i ymm0 = _mm256_loadu_si256((const __m256i*)src);
ymm0 = _mm256_shuffle_epi8(ymm0, ymm_shuf);
ymm0 = _mm256_permute4x64_epi64(ymm0, 0x4e);
_mm256_storeu_si256((__m256i*)dst, ymm0);
dst += 32;
width -= 32;
}
}
#endif
#ifdef HAS_MIRRORUVROW_AVX2
LIBYUV_TARGET_AVX2
void MirrorUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_uv, int width) {
__m256i ymm_shuf = _mm256_broadcastsi128_si256(
_mm_setr_epi8(14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1));
src_uv += width * 2;
while (width > 0) {
src_uv -= 32;
__m256i ymm0 = _mm256_loadu_si256((const __m256i*)src_uv);
ymm0 = _mm256_shuffle_epi8(ymm0, ymm_shuf);
ymm0 = _mm256_permute4x64_epi64(ymm0, 0x4e);
_mm256_storeu_si256((__m256i*)dst_uv, ymm0);
dst_uv += 32;
width -= 16;
}
}
#endif
#ifdef HAS_MIRRORSPLITUVROW_AVX2
LIBYUV_TARGET_AVX2
void MirrorSplitUVRow_AVX2(const uint8_t* src_uv,
uint8_t* dst_u,
uint8_t* dst_v,
int width) {
__m256i ymm_shuf = _mm256_broadcastsi128_si256(
_mm_setr_epi8(14, 12, 10, 8, 6, 4, 2, 0, 15, 13, 11, 9, 7, 5, 3, 1));
src_uv += width * 2;
while (width > 0) {
src_uv -= 32;
__m256i ymm0 = _mm256_loadu_si256((const __m256i*)src_uv);
ymm0 = _mm256_shuffle_epi8(ymm0, ymm_shuf);
ymm0 = _mm256_permute4x64_epi64(ymm0, 0x72);
_mm_storeu_si128((__m128i*)dst_u, _mm256_castsi256_si128(ymm0));
_mm_storeu_si128((__m128i*)dst_v, _mm256_extracti128_si256(ymm0, 1));
dst_u += 16;
dst_v += 16;
width -= 16;
}
}
#endif
#ifdef HAS_RGB24MIRRORROW_AVX2
LIBYUV_TARGET_AVX2
void RGB24MirrorRow_AVX2(const uint8_t* src_rgb24,
uint8_t* dst_rgb24,
int width) {
__m256i shuf0 =
_mm256_setr_epi8(-1, 12, 13, 14, 9, 10, 11, 6, 7, 8, 3, 4, 5, 0, 1, 2, -1,
12, 13, 14, 9, 10, 11, 6, 7, 8, 3, 4, 5, 0, 1, 2);
__m128i shuf1 =
_mm_setr_epi8(13, 14, 15, 10, 11, 12, 7, 8, 9, 4, 5, 6, 1, 2, 3, -1);
src_rgb24 += width * 3 - 96;
while (width > 0) {
__m128i v0_lo = _mm_loadu_si128((const __m128i*)(src_rgb24 + 0));
__m128i v0_hi = _mm_loadu_si128((const __m128i*)(src_rgb24 + 15));
__m256i v0 =
_mm256_inserti128_si256(_mm256_castsi128_si256(v0_lo), v0_hi, 1);
__m128i v1_lo = _mm_loadu_si128((const __m128i*)(src_rgb24 + 30));
__m128i v1_hi = _mm_loadu_si128((const __m128i*)(src_rgb24 + 45));
__m256i v1 =
_mm256_inserti128_si256(_mm256_castsi128_si256(v1_lo), v1_hi, 1);
__m128i v2_lo = _mm_loadu_si128((const __m128i*)(src_rgb24 + 60));
__m128i v2_hi = _mm_loadu_si128((const __m128i*)(src_rgb24 + 75));
__m256i v2 =
_mm256_inserti128_si256(_mm256_castsi128_si256(v2_lo), v2_hi, 1);
__m128i v3 = _mm_loadu_si128((const __m128i*)(src_rgb24 + 80));
v0 = _mm256_shuffle_epi8(v0, shuf0);
v1 = _mm256_shuffle_epi8(v1, shuf0);
v2 = _mm256_shuffle_epi8(v2, shuf0);
v3 = _mm_shuffle_epi8(v3, shuf1);
_mm_storeu_si128((__m128i*)(dst_rgb24 + 80), _mm256_castsi256_si128(v0));
_mm_storeu_si128((__m128i*)(dst_rgb24 + 65),
_mm256_extracti128_si256(v0, 1));
_mm_storeu_si128((__m128i*)(dst_rgb24 + 50), _mm256_castsi256_si128(v1));
_mm_storeu_si128((__m128i*)(dst_rgb24 + 35),
_mm256_extracti128_si256(v1, 1));
_mm_storeu_si128((__m128i*)(dst_rgb24 + 20), _mm256_castsi256_si128(v2));
_mm_storeu_si128((__m128i*)(dst_rgb24 + 5),
_mm256_extracti128_si256(v2, 1));
_mm_storel_epi64((__m128i*)(dst_rgb24 + 0), v3);
src_rgb24 -= 96;
dst_rgb24 += 96;
width -= 32;
}
}
#endif
#ifdef HAS_INTERPOLATEROW_AVX2
LIBYUV_TARGET_AVX2
void InterpolateRow_AVX2(uint8_t* dst_ptr,
const uint8_t* src_ptr,
ptrdiff_t src_stride,
int width,
int source_y_fraction) {
int y1 = source_y_fraction;
int y0 = 256 - y1;
const uint8_t* src_ptr1 = src_ptr + src_stride;
__m256i ymm_y = _mm256_set1_epi16((y1 << 8) | y0);
__m256i ymm_8080 = _mm256_set1_epi16(0x8080);
int i;
if (y1 == 0) {
for (i = 0; i < width; i += 32) {
_mm256_storeu_si256((__m256i*)(dst_ptr + i),
_mm256_loadu_si256((const __m256i*)(src_ptr + i)));
}
} else if (y1 == 128) {
for (i = 0; i < width; i += 32) {
__m256i row0 = _mm256_loadu_si256((const __m256i*)(src_ptr + i));
__m256i row1 = _mm256_loadu_si256((const __m256i*)(src_ptr1 + i));
_mm256_storeu_si256((__m256i*)(dst_ptr + i), _mm256_avg_epu8(row0, row1));
}
} else {
for (i = 0; i < width; i += 32) {
__m256i row0 = _mm256_loadu_si256((const __m256i*)(src_ptr + i));
__m256i row1 = _mm256_loadu_si256((const __m256i*)(src_ptr1 + i));
__m256i low = _mm256_unpacklo_epi8(row0, row1);
__m256i high = _mm256_unpackhi_epi8(row0, row1);
low = _mm256_sub_epi8(low, ymm_8080);
high = _mm256_sub_epi8(high, ymm_8080);
low = _mm256_maddubs_epi16(ymm_y, low);
high = _mm256_maddubs_epi16(ymm_y, high);
low = _mm256_add_epi16(low, ymm_8080);
high = _mm256_add_epi16(high, ymm_8080);
low = _mm256_srli_epi16(low, 8);
high = _mm256_srli_epi16(high, 8);
_mm256_storeu_si256((__m256i*)(dst_ptr + i),
_mm256_packus_epi16(low, high));
}
}
_mm256_zeroupper();
}
#endif
#ifdef HAS_INTERPOLATEROW_16_AVX2
LIBYUV_TARGET_AVX2
void InterpolateRow_16_AVX2(uint16_t* dst_ptr,
const uint16_t* src_ptr,
ptrdiff_t src_stride,
int width,
int source_y_fraction) {
int y1 = source_y_fraction;
int y0 = 256 - y1;
const uint16_t* src_ptr1 = src_ptr + src_stride;
__m256i ymm_y = _mm256_set1_epi32((y1 << 16) | y0);
__m256i ymm_8000 = _mm256_set1_epi16((short)0x8000);
__m256i ymm_round = _mm256_set1_epi32(8388736); // 0x800000 + 128
int i;
if (y1 == 0) {
for (i = 0; i < width; i += 16) {
_mm256_storeu_si256((__m256i*)(dst_ptr + i),
_mm256_loadu_si256((const __m256i*)(src_ptr + i)));
}
} else if (y1 == 128) {
for (i = 0; i < width; i += 16) {
__m256i row0 = _mm256_loadu_si256((const __m256i*)(src_ptr + i));
__m256i row1 = _mm256_loadu_si256((const __m256i*)(src_ptr1 + i));
_mm256_storeu_si256((__m256i*)(dst_ptr + i),
_mm256_avg_epu16(row0, row1));
}
} else {
for (i = 0; i < width; i += 16) {
__m256i row0 = _mm256_loadu_si256((const __m256i*)(src_ptr + i));
__m256i row1 = _mm256_loadu_si256((const __m256i*)(src_ptr1 + i));
__m256i row0l = _mm256_unpacklo_epi16(row0, row1);
__m256i row0h = _mm256_unpackhi_epi16(row0, row1);
row0l = _mm256_sub_epi16(row0l, ymm_8000);
row0h = _mm256_sub_epi16(row0h, ymm_8000);
__m256i resl = _mm256_madd_epi16(row0l, ymm_y);
__m256i resh = _mm256_madd_epi16(row0h, ymm_y);
resl = _mm256_add_epi32(resl, ymm_round);
resh = _mm256_add_epi32(resh, ymm_round);
resl = _mm256_srai_epi32(resl, 8);
resh = _mm256_srai_epi32(resh, 8);
_mm256_storeu_si256((__m256i*)(dst_ptr + i),
_mm256_packus_epi32(resl, resh));
}
}
_mm256_zeroupper();
}
#endif
#ifdef HAS_ARGBMIRRORROW_AVX2
LIBYUV_TARGET_AVX2
void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width) {
__m256i ymm_shuf = _mm256_setr_epi32(7, 6, 5, 4, 3, 2, 1, 0);
src += width * 4;
while (width > 0) {
src -= 32;
__m256i ymm0 = _mm256_loadu_si256((const __m256i*)src);
ymm0 = _mm256_permutevar8x32_epi32(ymm0, ymm_shuf);
_mm256_storeu_si256((__m256i*)dst, ymm0);
dst += 32;
width -= 8;
}
}
#endif
#ifdef HAS_J400TOARGBROW_AVX2
alignas(32) static const uint8_t kShuffleMaskJ400ToARGB_0[32] = {
0u, 0u, 0u, 128u, 1u, 1u, 1u, 128u, 2u, 2u, 2u, 128u, 3u, 3u, 3u, 128u,
4u, 4u, 4u, 128u, 5u, 5u, 5u, 128u, 6u, 6u, 6u, 128u, 7u, 7u, 7u, 128u};
alignas(32) static const uint8_t kShuffleMaskJ400ToARGB_1[32] = {
8u, 8u, 8u, 128u, 9u, 9u, 9u, 128u, 10u, 10u, 10u,
128u, 11u, 11u, 11u, 128u, 12u, 12u, 12u, 128u, 13u, 13u,
13u, 128u, 14u, 14u, 14u, 128u, 15u, 15u, 15u, 128u};
LIBYUV_TARGET_AVX2
void J400ToARGBRow_AVX2(const uint8_t* src_y, uint8_t* dst_argb, int width) {
__m256i ymm_mask0 =
_mm256_load_si256((const __m256i*)kShuffleMaskJ400ToARGB_0);
__m256i ymm_mask1 =
_mm256_load_si256((const __m256i*)kShuffleMaskJ400ToARGB_1);
__m256i ymm_alpha = _mm256_set1_epi32((int)0xff000000u);
while (width > 0) {
__m256i ymm0 =
_mm256_broadcastsi128_si256(_mm_loadu_si128((const __m128i*)src_y));
__m256i ymm1 = _mm256_shuffle_epi8(ymm0, ymm_mask0);
__m256i ymm2 = _mm256_shuffle_epi8(ymm0, ymm_mask1);
ymm1 = _mm256_or_si256(ymm1, ymm_alpha);
ymm2 = _mm256_or_si256(ymm2, ymm_alpha);
_mm256_storeu_si256((__m256i*)dst_argb, ymm1);
_mm256_storeu_si256((__m256i*)(dst_argb + 32), ymm2);
src_y += 16;
dst_argb += 64;
width -= 16;
}
}
#endif // HAS_J400TOARGBROW_AVX2
#ifdef HAS_RGB24TOARGBROW_AVX2
alignas(16) static const uint8_t kShuffleMaskRGB24ToARGB[2][16] = {
{0u, 1u, 2u, 128u, 3u, 4u, 5u, 128u, 6u, 7u, 8u, 128u, 9u, 10u, 11u, 128u},
{4u, 5u, 6u, 128u, 7u, 8u, 9u, 128u, 10u, 11u, 12u, 128u, 13u, 14u, 15u,
128u}};
#endif
#ifdef HAS_RGB565TOARGBROW_AVX2
LIBYUV_TARGET_AVX2
void RGB565ToARGBRow_AVX2(const uint8_t* src_rgb565,
uint8_t* dst_argb,
int width) {
__m256i ymm_scale_rb = _mm256_set1_epi32(0x01080108);
__m256i ymm_scale_g = _mm256_set1_epi32(0x20802080);
__m256i ymm_mask_b = _mm256_set1_epi16((short)0xf800);
__m256i ymm_mask_g = _mm256_set1_epi16(0x07e0);
__m256i ymm_mask_a = _mm256_set1_epi16((short)0xff00);
while (width > 0) {
__m256i ymm0 = _mm256_loadu_si256((const __m256i*)src_rgb565);
__m256i ymm1 = ymm0;
__m256i ymm2 = ymm0;
ymm1 = _mm256_and_si256(ymm1, ymm_mask_b);
ymm2 = _mm256_slli_epi16(ymm2, 11);
ymm1 = _mm256_mulhi_epu16(ymm1, ymm_scale_rb);
ymm2 = _mm256_mulhi_epu16(ymm2, ymm_scale_rb);
ymm1 = _mm256_slli_epi16(ymm1, 8);
ymm1 = _mm256_or_si256(ymm1, ymm2); // RB
ymm0 = _mm256_and_si256(ymm0, ymm_mask_g);
ymm0 = _mm256_mulhi_epu16(ymm0, ymm_scale_g);
ymm0 = _mm256_or_si256(ymm0, ymm_mask_a); // GA
ymm2 = _mm256_unpacklo_epi8(ymm1, ymm0);
ymm1 = _mm256_unpackhi_epi8(ymm1, ymm0);
ymm0 = _mm256_permute2x128_si256(ymm2, ymm1, 0x20);
ymm1 = _mm256_permute2x128_si256(ymm2, ymm1, 0x31);
_mm256_storeu_si256((__m256i*)dst_argb, ymm0);
_mm256_storeu_si256((__m256i*)(dst_argb + 32), ymm1);
src_rgb565 += 32;
dst_argb += 64;
width -= 16;
}
_mm256_zeroupper();
}
#endif
#ifdef HAS_ARGB1555TOARGBROW_AVX2
LIBYUV_TARGET_AVX2
void ARGB1555ToARGBRow_AVX2(const uint8_t* src_argb1555,
uint8_t* dst_argb,
int width) {
__m256i ymm_scale_rb = _mm256_set1_epi32(0x01080108);
__m256i ymm_scale_g = _mm256_set1_epi32(0x42004200);
__m256i ymm_mask_b = _mm256_set1_epi16((short)0xf800);
__m256i ymm_mask_g = _mm256_set1_epi16(0x03e0);
__m256i ymm_mask_a = _mm256_set1_epi16((short)0xff00);
while (width > 0) {
__m256i ymm0 = _mm256_loadu_si256((const __m256i*)src_argb1555);
__m256i ymm1 = ymm0;
__m256i ymm2 = ymm0;
ymm1 = _mm256_slli_epi16(ymm1, 1);
ymm2 = _mm256_slli_epi16(ymm2, 11);
ymm1 = _mm256_and_si256(ymm1, ymm_mask_b);
ymm2 = _mm256_mulhi_epu16(ymm2, ymm_scale_rb);
ymm1 = _mm256_mulhi_epu16(ymm1, ymm_scale_rb);
ymm1 = _mm256_slli_epi16(ymm1, 8);
ymm1 = _mm256_or_si256(ymm1, ymm2); // RB
ymm2 = ymm0;
ymm0 = _mm256_and_si256(ymm0, ymm_mask_g);
ymm2 = _mm256_srai_epi16(ymm2, 8);
ymm0 = _mm256_mulhi_epu16(ymm0, ymm_scale_g);
ymm2 = _mm256_and_si256(ymm2, ymm_mask_a);
ymm0 = _mm256_or_si256(ymm0, ymm2); // GA
ymm2 = _mm256_unpacklo_epi8(ymm1, ymm0);
ymm1 = _mm256_unpackhi_epi8(ymm1, ymm0);
ymm0 = _mm256_permute2x128_si256(ymm2, ymm1, 0x20);
ymm1 = _mm256_permute2x128_si256(ymm2, ymm1, 0x31);
_mm256_storeu_si256((__m256i*)dst_argb, ymm0);
_mm256_storeu_si256((__m256i*)(dst_argb + 32), ymm1);
src_argb1555 += 32;
dst_argb += 64;
width -= 16;
}
_mm256_zeroupper();
}
#endif
#ifdef HAS_ARGB4444TOARGBROW_AVX2
LIBYUV_TARGET_AVX2
void ARGB4444ToARGBRow_AVX2(const uint8_t* src_argb4444,
uint8_t* dst_argb,
int width) {
__m256i ymm_mask = _mm256_set1_epi32(0x0f0f0f0f);
__m256i ymm_mask2 = _mm256_slli_epi32(ymm_mask, 4);
while (width > 0) {
__m256i ymm0 = _mm256_loadu_si256((const __m256i*)src_argb4444);
__m256i ymm2 = ymm0;
ymm0 = _mm256_and_si256(ymm0, ymm_mask);
ymm2 = _mm256_and_si256(ymm2, ymm_mask2);
__m256i ymm1 = ymm0;
__m256i ymm3 = ymm2;
ymm1 = _mm256_slli_epi16(ymm1, 4);
ymm3 = _mm256_srli_epi16(ymm3, 4);
ymm0 = _mm256_or_si256(ymm0, ymm1);
ymm2 = _mm256_or_si256(ymm2, ymm3);
ymm1 = ymm0;
ymm0 = _mm256_unpacklo_epi8(ymm0, ymm2);
ymm1 = _mm256_unpackhi_epi8(ymm1, ymm2);
ymm2 = _mm256_permute2x128_si256(ymm0, ymm1, 0x20);
ymm1 = _mm256_permute2x128_si256(ymm0, ymm1, 0x31);
_mm256_storeu_si256((__m256i*)dst_argb, ymm2);
_mm256_storeu_si256((__m256i*)(dst_argb + 32), ymm1);
src_argb4444 += 32;
dst_argb += 64;
width -= 16;
}
_mm256_zeroupper();
}
#endif
#ifdef HAS_RGB24TOARGBROW_AVX2
LIBYUV_TARGET_AVX2
void RGB24ToARGBRow_AVX2(const uint8_t* src_rgb24,
uint8_t* dst_argb,
int width) {
__m256i ymm_alpha = _mm256_set1_epi32(0xff000000);
__m256i ymm_shuf = _mm256_broadcastsi128_si256(
_mm_load_si128((const __m128i*)kShuffleMaskRGB24ToARGB[0]));
__m256i ymm_shuf2 = _mm256_broadcastsi128_si256(
_mm_load_si128((const __m128i*)kShuffleMaskRGB24ToARGB[1]));
while (width > 0) {
__m128i xmm0 = _mm_loadu_si128((const __m128i*)src_rgb24);
__m256i ymm0 = _mm256_castsi128_si256(xmm0);
ymm0 = _mm256_inserti128_si256(
ymm0, _mm_loadu_si128((const __m128i*)(src_rgb24 + 12)), 1);
__m128i xmm1 = _mm_loadu_si128((const __m128i*)(src_rgb24 + 24));
__m256i ymm1 = _mm256_castsi128_si256(xmm1);
ymm1 = _mm256_inserti128_si256(
ymm1, _mm_loadu_si128((const __m128i*)(src_rgb24 + 36)), 1);
__m128i xmm2 = _mm_loadu_si128((const __m128i*)(src_rgb24 + 48));
__m256i ymm2 = _mm256_castsi128_si256(xmm2);
ymm2 = _mm256_inserti128_si256(
ymm2, _mm_loadu_si128((const __m128i*)(src_rgb24 + 60)), 1);
__m128i xmm3 = _mm_loadu_si128((const __m128i*)(src_rgb24 + 68));
__m256i ymm3 = _mm256_castsi128_si256(xmm3);
ymm3 = _mm256_inserti128_si256(
ymm3, _mm_loadu_si128((const __m128i*)(src_rgb24 + 80)), 1);
ymm0 = _mm256_shuffle_epi8(ymm0, ymm_shuf);
ymm1 = _mm256_shuffle_epi8(ymm1, ymm_shuf);
ymm2 = _mm256_shuffle_epi8(ymm2, ymm_shuf);
ymm3 = _mm256_shuffle_epi8(ymm3, ymm_shuf2);
ymm0 = _mm256_or_si256(ymm0, ymm_alpha);
ymm1 = _mm256_or_si256(ymm1, ymm_alpha);
ymm2 = _mm256_or_si256(ymm2, ymm_alpha);
ymm3 = _mm256_or_si256(ymm3, ymm_alpha);
_mm256_storeu_si256((__m256i*)dst_argb, ymm0);
_mm256_storeu_si256((__m256i*)(dst_argb + 32), ymm1);
_mm256_storeu_si256((__m256i*)(dst_argb + 64), ymm2);
_mm256_storeu_si256((__m256i*)(dst_argb + 96), ymm3);
src_rgb24 += 96;
dst_argb += 128;
width -= 32;
}
_mm256_zeroupper();
}
#endif
#ifdef HAS_ARGBSHUFFLEROW_AVX2
LIBYUV_TARGET_AVX2
void ARGBShuffleRow_AVX2(const uint8_t* src_argb,
uint8_t* dst_argb,
const uint8_t* shuffler,
int width) {
__m256i control =
_mm256_broadcastsi128_si256(_mm_loadu_si128((const __m128i*)shuffler));
while (width >= 16) {
__m256i row = _mm256_loadu_si256((const __m256i*)src_argb);
__m256i row1 = _mm256_loadu_si256((const __m256i*)(src_argb + 32));
row = _mm256_shuffle_epi8(row, control);
row1 = _mm256_shuffle_epi8(row1, control);
_mm256_storeu_si256((__m256i*)dst_argb, row);
_mm256_storeu_si256((__m256i*)(dst_argb + 32), row1);
src_argb += 64;
dst_argb += 64;
width -= 16;
}
}
#endif
#ifdef HAS_ARGBSHUFFLEROW_AVX512BW
LIBYUV_TARGET_AVX512BW
void ARGBShuffleRow_AVX512BW(const uint8_t* src_argb,
uint8_t* dst_argb,
const uint8_t* shuffler,
int width) {
__m512i control =
_mm512_broadcast_i32x4(_mm_loadu_si128((const __m128i*)shuffler));
while (width >= 32) {
__m512i row = _mm512_loadu_si512((const __m512i*)src_argb);
__m512i row1 = _mm512_loadu_si512((const __m512i*)(src_argb + 64));
row = _mm512_shuffle_epi8(row, control);
row1 = _mm512_shuffle_epi8(row1, control);
_mm512_storeu_si512((__m512i*)dst_argb, row);
_mm512_storeu_si512((__m512i*)(dst_argb + 64), row1);
src_argb += 128;
dst_argb += 128;
width -= 32;
}
}
#endif
#endif
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) ||
// defined(__i386__) || defined(_M_X64) || defined(_M_X86)) &&
// ((defined(_MSC_VER) && !defined(__clang__)) ||
// defined(LIBYUV_ENABLE_ROWWIN))
#endif // !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__) || defined(_M_X64) || defined(_M_X86)) && ((defined(_MSC_VER) && !defined(__clang__)) || defined(LIBYUV_ENABLE_ROWWIN))

View File

@ -11,7 +11,6 @@
#include "libyuv/scale.h"
#include <assert.h>
#include <limits.h>
#include <string.h>
#include "libyuv/cpu_id.h"
@ -40,8 +39,8 @@ static void ScalePlaneDown2(int src_width,
int src_height,
int dst_width,
int dst_height,
ptrdiff_t src_stride,
ptrdiff_t dst_stride,
int src_stride,
int dst_stride,
const uint8_t* src_ptr,
uint8_t* dst_ptr,
enum FilterMode filtering) {
@ -52,7 +51,7 @@ static void ScalePlaneDown2(int src_width,
? ScaleRowDown2_C
: (filtering == kFilterLinear ? ScaleRowDown2Linear_C
: ScaleRowDown2Box_C);
ptrdiff_t row_stride = src_stride * 2;
int row_stride = src_stride * 2;
(void)src_width;
(void)src_height;
if (!filtering) {
@ -152,8 +151,8 @@ static void ScalePlaneDown2_16(int src_width,
int src_height,
int dst_width,
int dst_height,
ptrdiff_t src_stride,
ptrdiff_t dst_stride,
int src_stride,
int dst_stride,
const uint16_t* src_ptr,
uint16_t* dst_ptr,
enum FilterMode filtering) {
@ -164,7 +163,7 @@ static void ScalePlaneDown2_16(int src_width,
? ScaleRowDown2_16_C
: (filtering == kFilterLinear ? ScaleRowDown2Linear_16_C
: ScaleRowDown2Box_16_C);
ptrdiff_t row_stride = src_stride * 2;
int row_stride = src_stride * 2;
(void)src_width;
(void)src_height;
if (!filtering) {
@ -229,7 +228,7 @@ void ScalePlaneDown2_16To8(int src_width,
? ScaleRowDown2_16To8_C
: (filtering == kFilterLinear ? ScaleRowDown2Linear_16To8_C
: ScaleRowDown2Box_16To8_C));
ptrdiff_t row_stride = (ptrdiff_t)src_stride * 2;
int row_stride = src_stride * 2;
(void)dst_height;
if (!filtering) {
src_ptr += src_stride; // Point to odd rows.
@ -260,8 +259,8 @@ static void ScalePlaneDown4(int src_width,
int src_height,
int dst_width,
int dst_height,
ptrdiff_t src_stride,
ptrdiff_t dst_stride,
int src_stride,
int dst_stride,
const uint8_t* src_ptr,
uint8_t* dst_ptr,
enum FilterMode filtering) {
@ -269,7 +268,7 @@ static void ScalePlaneDown4(int src_width,
void (*ScaleRowDown4)(const uint8_t* src_ptr, ptrdiff_t src_stride,
uint8_t* dst_ptr, int dst_width) =
filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C;
ptrdiff_t row_stride = src_stride * 4;
int row_stride = src_stride * 4;
(void)src_width;
(void)src_height;
if (!filtering) {
@ -332,8 +331,8 @@ static void ScalePlaneDown4_16(int src_width,
int src_height,
int dst_width,
int dst_height,
ptrdiff_t src_stride,
ptrdiff_t dst_stride,
int src_stride,
int dst_stride,
const uint16_t* src_ptr,
uint16_t* dst_ptr,
enum FilterMode filtering) {
@ -341,7 +340,7 @@ static void ScalePlaneDown4_16(int src_width,
void (*ScaleRowDown4)(const uint16_t* src_ptr, ptrdiff_t src_stride,
uint16_t* dst_ptr, int dst_width) =
filtering ? ScaleRowDown4Box_16_C : ScaleRowDown4_16_C;
ptrdiff_t row_stride = src_stride * 4;
int row_stride = src_stride * 4;
(void)src_width;
(void)src_height;
if (!filtering) {
@ -376,8 +375,8 @@ static void ScalePlaneDown34(int src_width,
int src_height,
int dst_width,
int dst_height,
ptrdiff_t src_stride,
ptrdiff_t dst_stride,
int src_stride,
int dst_stride,
const uint8_t* src_ptr,
uint8_t* dst_ptr,
enum FilterMode filtering) {
@ -386,7 +385,7 @@ static void ScalePlaneDown34(int src_width,
uint8_t* dst_ptr, int dst_width);
void (*ScaleRowDown34_1)(const uint8_t* src_ptr, ptrdiff_t src_stride,
uint8_t* dst_ptr, int dst_width);
const ptrdiff_t filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
(void)src_width;
(void)src_height;
assert(dst_width % 3 == 0);
@ -503,8 +502,8 @@ static void ScalePlaneDown34_16(int src_width,
int src_height,
int dst_width,
int dst_height,
ptrdiff_t src_stride,
ptrdiff_t dst_stride,
int src_stride,
int dst_stride,
const uint16_t* src_ptr,
uint16_t* dst_ptr,
enum FilterMode filtering) {
@ -513,7 +512,7 @@ static void ScalePlaneDown34_16(int src_width,
uint16_t* dst_ptr, int dst_width);
void (*ScaleRowDown34_1)(const uint16_t* src_ptr, ptrdiff_t src_stride,
uint16_t* dst_ptr, int dst_width);
const ptrdiff_t filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
(void)src_width;
(void)src_height;
assert(dst_width % 3 == 0);
@ -589,8 +588,8 @@ static void ScalePlaneDown38(int src_width,
int src_height,
int dst_width,
int dst_height,
ptrdiff_t src_stride,
ptrdiff_t dst_stride,
int src_stride,
int dst_stride,
const uint8_t* src_ptr,
uint8_t* dst_ptr,
enum FilterMode filtering) {
@ -599,7 +598,7 @@ static void ScalePlaneDown38(int src_width,
uint8_t* dst_ptr, int dst_width);
void (*ScaleRowDown38_2)(const uint8_t* src_ptr, ptrdiff_t src_stride,
uint8_t* dst_ptr, int dst_width);
const ptrdiff_t filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
assert(dst_width % 3 == 0);
(void)src_width;
(void)src_height;
@ -709,8 +708,8 @@ static void ScalePlaneDown38_16(int src_width,
int src_height,
int dst_width,
int dst_height,
ptrdiff_t src_stride,
ptrdiff_t dst_stride,
int src_stride,
int dst_stride,
const uint16_t* src_ptr,
uint16_t* dst_ptr,
enum FilterMode filtering) {
@ -719,7 +718,7 @@ static void ScalePlaneDown38_16(int src_width,
uint16_t* dst_ptr, int dst_width);
void (*ScaleRowDown38_2)(const uint16_t* src_ptr, ptrdiff_t src_stride,
uint16_t* dst_ptr, int dst_width);
const ptrdiff_t filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
(void)src_width;
(void)src_height;
assert(dst_width % 3 == 0);
@ -902,8 +901,8 @@ static int ScalePlaneBox(int src_width,
int src_height,
int dst_width,
int dst_height,
ptrdiff_t src_stride,
ptrdiff_t dst_stride,
int src_stride,
int dst_stride,
const uint8_t* src_ptr,
uint8_t* dst_ptr) {
int j, k;
@ -968,7 +967,7 @@ static int ScalePlaneBox(int src_width,
for (j = 0; j < dst_height; ++j) {
int boxheight;
int iy = y >> 16;
const uint8_t* src = src_ptr + iy * src_stride;
const uint8_t* src = src_ptr + iy * (int64_t)src_stride;
y += dy;
if (y > max_y) {
y = max_y;
@ -991,8 +990,8 @@ static int ScalePlaneBox_16(int src_width,
int src_height,
int dst_width,
int dst_height,
ptrdiff_t src_stride,
ptrdiff_t dst_stride,
int src_stride,
int dst_stride,
const uint16_t* src_ptr,
uint16_t* dst_ptr) {
int j, k;
@ -1025,7 +1024,7 @@ static int ScalePlaneBox_16(int src_width,
for (j = 0; j < dst_height; ++j) {
int boxheight;
int iy = y >> 16;
const uint16_t* src = src_ptr + iy * src_stride;
const uint16_t* src = src_ptr + iy * (int64_t)src_stride;
y += dy;
if (y > max_y) {
y = max_y;
@ -1049,8 +1048,8 @@ static int ScalePlaneBilinearDown(int src_width,
int src_height,
int dst_width,
int dst_height,
ptrdiff_t src_stride,
ptrdiff_t dst_stride,
int src_stride,
int dst_stride,
const uint8_t* src_ptr,
uint8_t* dst_ptr,
enum FilterMode filtering) {
@ -1077,6 +1076,14 @@ static int ScalePlaneBilinearDown(int src_width,
&dx, &dy);
src_width = Abs(src_width);
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(src_width, 16)) {
InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2;
@ -1139,7 +1146,7 @@ static int ScalePlaneBilinearDown(int src_width,
for (j = 0; j < dst_height; ++j) {
int yi = y >> 16;
const uint8_t* src = src_ptr + yi * src_stride;
const uint8_t* src = src_ptr + yi * (int64_t)src_stride;
if (filtering == kFilterLinear) {
ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
} else {
@ -1161,8 +1168,8 @@ static int ScalePlaneBilinearDown_16(int src_width,
int src_height,
int dst_width,
int dst_height,
ptrdiff_t src_stride,
ptrdiff_t dst_stride,
int src_stride,
int dst_stride,
const uint16_t* src_ptr,
uint16_t* dst_ptr,
enum FilterMode filtering) {
@ -1189,6 +1196,14 @@ static int ScalePlaneBilinearDown_16(int src_width,
&dx, &dy);
src_width = Abs(src_width);
#if defined(HAS_INTERPOLATEROW_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
InterpolateRow = InterpolateRow_16_Any_SSE2;
if (IS_ALIGNED(src_width, 16)) {
InterpolateRow = InterpolateRow_16_SSE2;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_16_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_16_Any_SSSE3;
@ -1230,7 +1245,7 @@ static int ScalePlaneBilinearDown_16(int src_width,
for (j = 0; j < dst_height; ++j) {
int yi = y >> 16;
const uint16_t* src = src_ptr + yi * src_stride;
const uint16_t* src = src_ptr + yi * (int64_t)src_stride;
if (filtering == kFilterLinear) {
ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
} else {
@ -1253,8 +1268,8 @@ static int ScalePlaneBilinearUp(int src_width,
int src_height,
int dst_width,
int dst_height,
ptrdiff_t src_stride,
ptrdiff_t dst_stride,
int src_stride,
int dst_stride,
const uint8_t* src_ptr,
uint8_t* dst_ptr,
enum FilterMode filtering) {
@ -1275,6 +1290,14 @@ static int ScalePlaneBilinearUp(int src_width,
&dx, &dy);
src_width = Abs(src_width);
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(dst_width, 16)) {
InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2;
@ -1340,7 +1363,7 @@ static int ScalePlaneBilinearUp(int src_width,
}
{
int yi = y >> 16;
const uint8_t* src = src_ptr + yi * src_stride;
const uint8_t* src = src_ptr + yi * (int64_t)src_stride;
// Allocate 2 row buffers.
const int row_size = (dst_width + 31) & ~31;
@ -1349,7 +1372,7 @@ static int ScalePlaneBilinearUp(int src_width,
return 1;
uint8_t* rowptr = row;
ptrdiff_t rowstride = row_size;
int rowstride = row_size;
int lasty = yi;
ScaleFilterCols(rowptr, src, dst_width, x, dx);
@ -1367,7 +1390,7 @@ static int ScalePlaneBilinearUp(int src_width,
if (y > max_y) {
y = max_y;
yi = y >> 16;
src = src_ptr + yi * src_stride;
src = src_ptr + yi * (int64_t)src_stride;
}
if (yi != lasty) {
ScaleFilterCols(rowptr, src, dst_width, x, dx);
@ -1402,8 +1425,8 @@ static void ScalePlaneUp2_Linear(int src_width,
int src_height,
int dst_width,
int dst_height,
ptrdiff_t src_stride,
ptrdiff_t dst_stride,
int src_stride,
int dst_stride,
const uint8_t* src_ptr,
uint8_t* dst_ptr) {
void (*ScaleRowUp)(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) =
@ -1446,13 +1469,13 @@ static void ScalePlaneUp2_Linear(int src_width,
#endif
if (dst_height == 1) {
ScaleRowUp(src_ptr + ((src_height - 1) / 2) * src_stride, dst_ptr,
ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr,
dst_width);
} else {
dy = FixedDiv(src_height - 1, dst_height - 1);
y = (1 << 15) - 1;
for (i = 0; i < dst_height; ++i) {
ScaleRowUp(src_ptr + (y >> 16) * src_stride, dst_ptr, dst_width);
ScaleRowUp(src_ptr + (y >> 16) * (int64_t)src_stride, dst_ptr, dst_width);
dst_ptr += dst_stride;
y += dy;
}
@ -1467,8 +1490,8 @@ static void ScalePlaneUp2_Bilinear(int src_width,
int src_height,
int dst_width,
int dst_height,
ptrdiff_t src_stride,
ptrdiff_t dst_stride,
int src_stride,
int dst_stride,
const uint8_t* src_ptr,
uint8_t* dst_ptr) {
void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride,
@ -1533,8 +1556,8 @@ static void ScalePlaneUp2_12_Linear(int src_width,
int src_height,
int dst_width,
int dst_height,
ptrdiff_t src_stride,
ptrdiff_t dst_stride,
int src_stride,
int dst_stride,
const uint16_t* src_ptr,
uint16_t* dst_ptr) {
void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr,
@ -1566,13 +1589,13 @@ static void ScalePlaneUp2_12_Linear(int src_width,
#endif
if (dst_height == 1) {
ScaleRowUp(src_ptr + ((src_height - 1) / 2) * src_stride, dst_ptr,
ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr,
dst_width);
} else {
dy = FixedDiv(src_height - 1, dst_height - 1);
y = (1 << 15) - 1;
for (i = 0; i < dst_height; ++i) {
ScaleRowUp(src_ptr + (y >> 16) * src_stride, dst_ptr, dst_width);
ScaleRowUp(src_ptr + (y >> 16) * (int64_t)src_stride, dst_ptr, dst_width);
dst_ptr += dst_stride;
y += dy;
}
@ -1588,8 +1611,8 @@ static void ScalePlaneUp2_12_Bilinear(int src_width,
int src_height,
int dst_width,
int dst_height,
ptrdiff_t src_stride,
ptrdiff_t dst_stride,
int src_stride,
int dst_stride,
const uint16_t* src_ptr,
uint16_t* dst_ptr) {
void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride,
@ -1636,8 +1659,8 @@ static void ScalePlaneUp2_16_Linear(int src_width,
int src_height,
int dst_width,
int dst_height,
ptrdiff_t src_stride,
ptrdiff_t dst_stride,
int src_stride,
int dst_stride,
const uint16_t* src_ptr,
uint16_t* dst_ptr) {
void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr,
@ -1669,13 +1692,13 @@ static void ScalePlaneUp2_16_Linear(int src_width,
#endif
if (dst_height == 1) {
ScaleRowUp(src_ptr + ((src_height - 1) / 2) * src_stride, dst_ptr,
ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr,
dst_width);
} else {
dy = FixedDiv(src_height - 1, dst_height - 1);
y = (1 << 15) - 1;
for (i = 0; i < dst_height; ++i) {
ScaleRowUp(src_ptr + (y >> 16) * src_stride, dst_ptr, dst_width);
ScaleRowUp(src_ptr + (y >> 16) * (int64_t)src_stride, dst_ptr, dst_width);
dst_ptr += dst_stride;
y += dy;
}
@ -1686,8 +1709,8 @@ static void ScalePlaneUp2_16_Bilinear(int src_width,
int src_height,
int dst_width,
int dst_height,
ptrdiff_t src_stride,
ptrdiff_t dst_stride,
int src_stride,
int dst_stride,
const uint16_t* src_ptr,
uint16_t* dst_ptr) {
void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride,
@ -1734,8 +1757,8 @@ static int ScalePlaneBilinearUp_16(int src_width,
int src_height,
int dst_width,
int dst_height,
ptrdiff_t src_stride,
ptrdiff_t dst_stride,
int src_stride,
int dst_stride,
const uint16_t* src_ptr,
uint16_t* dst_ptr,
enum FilterMode filtering) {
@ -1756,6 +1779,14 @@ static int ScalePlaneBilinearUp_16(int src_width,
&dx, &dy);
src_width = Abs(src_width);
#if defined(HAS_INTERPOLATEROW_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
InterpolateRow = InterpolateRow_16_Any_SSE2;
if (IS_ALIGNED(dst_width, 16)) {
InterpolateRow = InterpolateRow_16_SSE2;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_16_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_16_Any_SSSE3;
@ -1807,12 +1838,12 @@ static int ScalePlaneBilinearUp_16(int src_width,
}
{
int yi = y >> 16;
const uint16_t* src = src_ptr + yi * src_stride;
const uint16_t* src = src_ptr + yi * (int64_t)src_stride;
// Allocate 2 row buffers.
const int row_size = (dst_width + 31) & ~31;
align_buffer_64(row, row_size * 4);
ptrdiff_t rowstride = row_size;
int rowstride = row_size;
int lasty = yi;
uint16_t* rowptr = (uint16_t*)row;
if (!row)
@ -1833,7 +1864,7 @@ static int ScalePlaneBilinearUp_16(int src_width,
if (y > max_y) {
y = max_y;
yi = y >> 16;
src = src_ptr + yi * src_stride;
src = src_ptr + yi * (int64_t)src_stride;
}
if (yi != lasty) {
ScaleFilterCols(rowptr, src, dst_width, x, dx);
@ -1868,8 +1899,8 @@ static void ScalePlaneSimple(int src_width,
int src_height,
int dst_width,
int dst_height,
ptrdiff_t src_stride,
ptrdiff_t dst_stride,
int src_stride,
int dst_stride,
const uint8_t* src_ptr,
uint8_t* dst_ptr) {
int i;
@ -1894,7 +1925,8 @@ static void ScalePlaneSimple(int src_width,
}
for (i = 0; i < dst_height; ++i) {
ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx);
ScaleCols(dst_ptr, src_ptr + (y >> 16) * (int64_t)src_stride, dst_width, x,
dx);
dst_ptr += dst_stride;
y += dy;
}
@ -1904,8 +1936,8 @@ static void ScalePlaneSimple_16(int src_width,
int src_height,
int dst_width,
int dst_height,
ptrdiff_t src_stride,
ptrdiff_t dst_stride,
int src_stride,
int dst_stride,
const uint16_t* src_ptr,
uint16_t* dst_ptr) {
int i;
@ -1930,7 +1962,8 @@ static void ScalePlaneSimple_16(int src_width,
}
for (i = 0; i < dst_height; ++i) {
ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx);
ScaleCols(dst_ptr, src_ptr + (y >> 16) * (int64_t)src_stride, dst_width, x,
dx);
dst_ptr += dst_stride;
y += dy;
}
@ -1948,14 +1981,6 @@ int ScalePlane(const uint8_t* src,
int dst_width,
int dst_height,
enum FilterMode filtering) {
// Reject dimensions larger than 32768 (or smaller than -32768 for height).
// This prevents FixedDiv signed integer overflows that can lead to division
// by zero/overflow crashes (SIGFPE on x86) or incorrect step calculations.
if (!src || src_width <= 0 || src_height == 0 || src_width > 32768 ||
src_height < -32768 || src_height > 32768 || !dst || dst_width <= 0 ||
dst_height <= 0) {
return -1;
}
// Simplify filtering when possible.
filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
filtering);
@ -1963,7 +1988,7 @@ int ScalePlane(const uint8_t* src,
// Negative height means invert the image.
if (src_height < 0) {
src_height = -src_height;
src = src + (src_height - 1) * (ptrdiff_t)src_stride;
src = src + (src_height - 1) * (int64_t)src_stride;
src_stride = -src_stride;
}
// Use specialized scales to improve performance for common resolutions.
@ -2056,14 +2081,6 @@ int ScalePlane_16(const uint16_t* src,
int dst_width,
int dst_height,
enum FilterMode filtering) {
// Reject dimensions larger than 32768 (or smaller than -32768 for height).
// This prevents FixedDiv signed integer overflows that can lead to division
// by zero/overflow crashes (SIGFPE on x86) or incorrect step calculations.
if (!src || src_width <= 0 || src_height == 0 || src_width > 32768 ||
src_height < -32768 || src_height > 32768 || !dst || dst_width <= 0 ||
dst_height <= 0) {
return -1;
}
// Simplify filtering when possible.
filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
filtering);
@ -2071,7 +2088,7 @@ int ScalePlane_16(const uint16_t* src,
// Negative height means invert the image.
if (src_height < 0) {
src_height = -src_height;
src = src + (src_height - 1) * (ptrdiff_t)src_stride;
src = src + (src_height - 1) * (int64_t)src_stride;
src_stride = -src_stride;
}
// Use specialized scales to improve performance for common resolutions.
@ -2168,14 +2185,6 @@ int ScalePlane_12(const uint16_t* src,
int dst_width,
int dst_height,
enum FilterMode filtering) {
// Reject dimensions larger than 32768 (or smaller than -32768 for height).
// This prevents FixedDiv signed integer overflows that can lead to division
// by zero/overflow crashes (SIGFPE on x86) or incorrect step calculations.
if (!src || src_width <= 0 || src_height == 0 || src_width > 32768 ||
src_height < -32768 || src_height > 32768 || !dst || dst_width <= 0 ||
dst_height <= 0) {
return -1;
}
// Simplify filtering when possible.
filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
filtering);
@ -2183,7 +2192,7 @@ int ScalePlane_12(const uint16_t* src,
// Negative height means invert the image.
if (src_height < 0) {
src_height = -src_height;
src = src + (src_height - 1) * (ptrdiff_t)src_stride;
src = src + (src_height - 1) * (int64_t)src_stride;
src_stride = -src_stride;
}
@ -2224,17 +2233,17 @@ int I420Scale(const uint8_t* src_y,
int dst_width,
int dst_height,
enum FilterMode filtering) {
int r;
if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
src_height == INT_MIN || !dst_y || !dst_u || !dst_v || dst_width <= 0 ||
dst_height <= 0) {
return -1;
}
int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
int src_halfheight = SUBSAMPLE(src_height, 1, 1);
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
int r;
if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
dst_width <= 0 || dst_height <= 0) {
return -1;
}
r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y,
dst_stride_y, dst_width, dst_height, filtering);
@ -2269,17 +2278,17 @@ int I420Scale_16(const uint16_t* src_y,
int dst_width,
int dst_height,
enum FilterMode filtering) {
int r;
if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
src_height == INT_MIN || !dst_y || !dst_u || !dst_v || dst_width <= 0 ||
dst_height <= 0) {
return -1;
}
int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
int src_halfheight = SUBSAMPLE(src_height, 1, 1);
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
int r;
if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
dst_width <= 0 || dst_height <= 0) {
return -1;
}
r = ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y,
dst_stride_y, dst_width, dst_height, filtering);
@ -2314,17 +2323,17 @@ int I420Scale_12(const uint16_t* src_y,
int dst_width,
int dst_height,
enum FilterMode filtering) {
int r;
if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
src_height == INT_MIN || !dst_y || !dst_u || !dst_v || dst_width <= 0 ||
dst_height <= 0) {
return -1;
}
int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
int src_halfheight = SUBSAMPLE(src_height, 1, 1);
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
int r;
if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
dst_width <= 0 || dst_height <= 0) {
return -1;
}
r = ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y,
dst_stride_y, dst_width, dst_height, filtering);
@ -2365,8 +2374,8 @@ int I444Scale(const uint8_t* src_y,
int r;
if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
src_height == INT_MIN || !dst_y || !dst_u || !dst_v || dst_width <= 0 ||
dst_height <= 0) {
src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
dst_width <= 0 || dst_height <= 0) {
return -1;
}
@ -2406,8 +2415,8 @@ int I444Scale_16(const uint16_t* src_y,
int r;
if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
src_height == INT_MIN || !dst_y || !dst_u || !dst_v || dst_width <= 0 ||
dst_height <= 0) {
src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
dst_width <= 0 || dst_height <= 0) {
return -1;
}
@ -2447,8 +2456,8 @@ int I444Scale_12(const uint16_t* src_y,
int r;
if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
src_height == INT_MIN || !dst_y || !dst_u || !dst_v || dst_width <= 0 ||
dst_height <= 0) {
src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
dst_width <= 0 || dst_height <= 0) {
return -1;
}
@ -2488,15 +2497,15 @@ int I422Scale(const uint8_t* src_y,
int dst_width,
int dst_height,
enum FilterMode filtering) {
int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
int r;
if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
src_height == INT_MIN || !dst_y || !dst_u || !dst_v || dst_width <= 0 ||
dst_height <= 0) {
src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
dst_width <= 0 || dst_height <= 0) {
return -1;
}
int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y,
dst_stride_y, dst_width, dst_height, filtering);
@ -2531,15 +2540,15 @@ int I422Scale_16(const uint16_t* src_y,
int dst_width,
int dst_height,
enum FilterMode filtering) {
int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
int r;
if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
src_height == INT_MIN || !dst_y || !dst_u || !dst_v || dst_width <= 0 ||
dst_height <= 0) {
src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
dst_width <= 0 || dst_height <= 0) {
return -1;
}
int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
r = ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y,
dst_stride_y, dst_width, dst_height, filtering);
@ -2574,15 +2583,15 @@ int I422Scale_12(const uint16_t* src_y,
int dst_width,
int dst_height,
enum FilterMode filtering) {
int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
int r;
if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
src_height == INT_MIN || !dst_y || !dst_u || !dst_v || dst_width <= 0 ||
dst_height <= 0) {
src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
dst_width <= 0 || dst_height <= 0) {
return -1;
}
int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
r = ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y,
dst_stride_y, dst_width, dst_height, filtering);
@ -2616,17 +2625,17 @@ int NV12Scale(const uint8_t* src_y,
int dst_width,
int dst_height,
enum FilterMode filtering) {
int r;
if (!src_y || !src_uv || src_width <= 0 || src_height == 0 ||
src_height == INT_MIN || !dst_y || !dst_uv || dst_width <= 0 ||
dst_height <= 0) {
return -1;
}
int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
int src_halfheight = SUBSAMPLE(src_height, 1, 1);
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
int r;
if (!src_y || !src_uv || src_width <= 0 || src_height == 0 ||
src_width > 32768 || src_height > 32768 || !dst_y || !dst_uv ||
dst_width <= 0 || dst_height <= 0) {
return -1;
}
r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y,
dst_stride_y, dst_width, dst_height, filtering);
@ -2655,8 +2664,8 @@ int NV24Scale(const uint8_t* src_y,
int r;
if (!src_y || !src_uv || src_width <= 0 || src_height == 0 ||
src_height == INT_MIN || !dst_y || !dst_uv || dst_width <= 0 ||
dst_height <= 0) {
src_width > 32768 || src_height > 32768 || !dst_y || !dst_uv ||
dst_width <= 0 || dst_height <= 0) {
return -1;
}

View File

@ -38,8 +38,8 @@ static void ScaleARGBDown2(int src_width,
int src_height,
int dst_width,
int dst_height,
ptrdiff_t src_stride,
ptrdiff_t dst_stride,
int src_stride,
int dst_stride,
const uint8_t* src_argb,
uint8_t* dst_argb,
int x,
@ -48,7 +48,7 @@ static void ScaleARGBDown2(int src_width,
int dy,
enum FilterMode filtering) {
int j;
ptrdiff_t row_stride = src_stride * (dy >> 16);
int row_stride = src_stride * (dy >> 16);
void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride,
uint8_t* dst_argb, int dst_width) =
filtering == kFilterNone
@ -62,9 +62,9 @@ static void ScaleARGBDown2(int src_width,
assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
// Advance to odd row, even column.
if (filtering == kFilterBilinear) {
src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
src_argb += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4;
} else {
src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4;
src_argb += (y >> 16) * (intptr_t)src_stride + ((x >> 16) - 1) * 4;
}
#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
@ -152,8 +152,8 @@ static int ScaleARGBDown4Box(int src_width,
int src_height,
int dst_width,
int dst_height,
ptrdiff_t src_stride,
ptrdiff_t dst_stride,
int src_stride,
int dst_stride,
const uint8_t* src_argb,
uint8_t* dst_argb,
int x,
@ -169,12 +169,12 @@ static int ScaleARGBDown4Box(int src_width,
align_buffer_64(row, row_size * 2);
if (!row)
return 1;
ptrdiff_t row_stride = src_stride * (dy >> 16);
int row_stride = src_stride * (dy >> 16);
void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride,
uint8_t* dst_argb, int dst_width) =
ScaleARGBRowDown2Box_C;
// Advance to odd row, even column.
src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
src_argb += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4;
(void)src_width;
(void)src_height;
(void)dx;
@ -226,8 +226,8 @@ static void ScaleARGBDownEven(int src_width,
int src_height,
int dst_width,
int dst_height,
ptrdiff_t src_stride,
ptrdiff_t dst_stride,
int src_stride,
int dst_stride,
const uint8_t* src_argb,
uint8_t* dst_argb,
int x,
@ -237,7 +237,7 @@ static void ScaleARGBDownEven(int src_width,
enum FilterMode filtering) {
int j;
int col_step = dx >> 16;
ptrdiff_t row_stride = (dy >> 16) * src_stride;
ptrdiff_t row_stride = (ptrdiff_t)((dy >> 16) * (intptr_t)src_stride);
void (*ScaleARGBRowDownEven)(const uint8_t* src_argb, ptrdiff_t src_stride,
int src_step, uint8_t* dst_argb, int dst_width) =
filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
@ -245,7 +245,7 @@ static void ScaleARGBDownEven(int src_width,
(void)src_height;
assert(IS_ALIGNED(src_width, 2));
assert(IS_ALIGNED(src_height, 2));
src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
src_argb += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4;
#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2
@ -302,8 +302,8 @@ static int ScaleARGBBilinearDown(int src_width,
int src_height,
int dst_width,
int dst_height,
ptrdiff_t src_stride,
ptrdiff_t dst_stride,
int src_stride,
int dst_stride,
const uint8_t* src_argb,
uint8_t* dst_argb,
int x,
@ -331,6 +331,14 @@ static int ScaleARGBBilinearDown(int src_width,
clip_src_width = (int)(xr - xl) * 4; // Width aligned to 4.
src_argb += xl * 4;
x -= (int)(xl << 16);
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(clip_src_width, 16)) {
InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2;
@ -405,7 +413,7 @@ static int ScaleARGBBilinearDown(int src_width,
}
for (j = 0; j < dst_height; ++j) {
int yi = y >> 16;
const uint8_t* src = src_argb + yi * src_stride;
const uint8_t* src = src_argb + yi * (intptr_t)src_stride;
if (filtering == kFilterLinear) {
ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx);
} else {
@ -429,8 +437,8 @@ static int ScaleARGBBilinearUp(int src_width,
int src_height,
int dst_width,
int dst_height,
ptrdiff_t src_stride,
ptrdiff_t dst_stride,
int src_stride,
int dst_stride,
const uint8_t* src_argb,
uint8_t* dst_argb,
int x,
@ -446,6 +454,14 @@ static int ScaleARGBBilinearUp(int src_width,
int dst_width, int x, int dx) =
filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
const int max_y = (src_height - 1) << 16;
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2;
@ -546,7 +562,7 @@ static int ScaleARGBBilinearUp(int src_width,
{
int yi = y >> 16;
const uint8_t* src = src_argb + yi * src_stride;
const uint8_t* src = src_argb + yi * (intptr_t)src_stride;
// Allocate 2 rows of ARGB.
const int row_size = (dst_width * 4 + 31) & ~31;
@ -555,7 +571,7 @@ static int ScaleARGBBilinearUp(int src_width,
return 1;
uint8_t* rowptr = row;
ptrdiff_t rowstride = row_size;
int rowstride = row_size;
int lasty = yi;
ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
@ -573,7 +589,7 @@ static int ScaleARGBBilinearUp(int src_width,
if (y > max_y) {
y = max_y;
yi = y >> 16;
src = src_argb + yi * src_stride;
src = src_argb + yi * (intptr_t)src_stride;
}
if (yi != lasty) {
ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
@ -599,6 +615,283 @@ static int ScaleARGBBilinearUp(int src_width,
return 0;
}
#ifdef YUVSCALEUP
// Scale YUV to ARGB up with bilinear interpolation.
static int ScaleYUVToARGBBilinearUp(int src_width,
int src_height,
int dst_width,
int dst_height,
int src_stride_y,
int src_stride_u,
int src_stride_v,
int dst_stride_argb,
const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_argb,
int x,
int dx,
int y,
int dy,
enum FilterMode filtering) {
int j;
void (*I422ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf,
const uint8_t* v_buf, uint8_t* rgb_buf, int width) =
I422ToARGBRow_C;
#if defined(HAS_I422TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(src_width, 8)) {
I422ToARGBRow = I422ToARGBRow_SSSE3;
}
}
#endif
#if defined(HAS_I422TOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
I422ToARGBRow = I422ToARGBRow_Any_AVX2;
if (IS_ALIGNED(src_width, 16)) {
I422ToARGBRow = I422ToARGBRow_AVX2;
}
}
#endif
#if defined(HAS_I422TOARGBROW_AVX512BW)
if (TestCpuFlag(kCpuHasAVX512BW | kCpuHasAVX512VL) ==
(kCpuHasAVX512BW | kCpuHasAVX512VL)) {
I422ToARGBRow = I422ToARGBRow_Any_AVX512BW;
if (IS_ALIGNED(src_width, 32)) {
I422ToARGBRow = I422ToARGBRow_AVX512BW;
}
}
#endif
#if defined(HAS_I422TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
I422ToARGBRow = I422ToARGBRow_Any_NEON;
if (IS_ALIGNED(src_width, 8)) {
I422ToARGBRow = I422ToARGBRow_NEON;
}
}
#endif
#if defined(HAS_I422TOARGBROW_SVE2)
if (TestCpuFlag(kCpuHasSVE2)) {
I422ToARGBRow = I422ToARGBRow_SVE2;
}
#endif
#if defined(HAS_I422TOARGBROW_SME)
if (TestCpuFlag(kCpuHasSME)) {
I422ToARGBRow = I422ToARGBRow_SME;
}
#endif
#if defined(HAS_I422TOARGBROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
I422ToARGBRow = I422ToARGBRow_Any_LSX;
if (IS_ALIGNED(src_width, 16)) {
I422ToARGBRow = I422ToARGBRow_LSX;
}
}
#endif
#if defined(HAS_I422TOARGBROW_LASX)
if (TestCpuFlag(kCpuHasLASX)) {
I422ToARGBRow = I422ToARGBRow_Any_LASX;
if (IS_ALIGNED(src_width, 32)) {
I422ToARGBRow = I422ToARGBRow_LASX;
}
}
#endif
#if defined(HAS_I422TOARGBROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
I422ToARGBRow = I422ToARGBRow_RVV;
}
#endif
void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2;
if (IS_ALIGNED(dst_width, 8)) {
InterpolateRow = InterpolateRow_AVX2;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
InterpolateRow = InterpolateRow_Any_NEON;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_NEON;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_SME)
if (TestCpuFlag(kCpuHasSME)) {
InterpolateRow = InterpolateRow_SME;
}
#endif
#if defined(HAS_INTERPOLATEROW_LSX)
if (TestCpuFlag(kCpuHasLSX)) {
InterpolateRow = InterpolateRow_Any_LSX;
if (IS_ALIGNED(dst_width, 8)) {
InterpolateRow = InterpolateRow_LSX;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_RVV)
if (TestCpuFlag(kCpuHasRVV)) {
InterpolateRow = InterpolateRow_RVV;
}
#endif
void (*ScaleARGBFilterCols)(uint8_t* dst_argb, const uint8_t* src_argb,
int dst_width, int x, int dx) =
filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
if (src_width >= 32768) {
ScaleARGBFilterCols =
filtering ? ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
}
#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
}
#endif
#if defined(HAS_SCALEARGBFILTERCOLS_NEON)
if (filtering && TestCpuFlag(kCpuHasNEON)) {
ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
if (IS_ALIGNED(dst_width, 4)) {
ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
}
}
#endif
#if defined(HAS_SCALEARGBFILTERCOLS_LSX)
if (filtering && TestCpuFlag(kCpuHasLSX)) {
ScaleARGBFilterCols = ScaleARGBFilterCols_Any_LSX;
if (IS_ALIGNED(dst_width, 8)) {
ScaleARGBFilterCols = ScaleARGBFilterCols_LSX;
}
}
#endif
#if defined(HAS_SCALEARGBFILTERCOLS_RVV)
if (filtering && TestCpuFlag(kCpuHasRVV)) {
ScaleARGBFilterCols = ScaleARGBFilterCols_RVV;
}
#endif
#if defined(HAS_SCALEARGBCOLS_SSE2)
if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
ScaleARGBFilterCols = ScaleARGBCols_SSE2;
}
#endif
#if defined(HAS_SCALEARGBCOLS_NEON)
if (!filtering && TestCpuFlag(kCpuHasNEON)) {
ScaleARGBFilterCols = ScaleARGBCols_Any_NEON;
if (IS_ALIGNED(dst_width, 8)) {
ScaleARGBFilterCols = ScaleARGBCols_NEON;
}
}
#endif
#if defined(HAS_SCALEARGBCOLS_LSX)
if (!filtering && TestCpuFlag(kCpuHasLSX)) {
ScaleARGBFilterCols = ScaleARGBCols_Any_LSX;
if (IS_ALIGNED(dst_width, 4)) {
ScaleARGBFilterCols = ScaleARGBCols_LSX;
}
}
#endif
if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
ScaleARGBFilterCols = ScaleARGBColsUp2_C;
#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
}
#endif
}
const int max_y = (src_height - 1) << 16;
if (y > max_y) {
y = max_y;
}
const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate.
int yi = y >> 16;
int uv_yi = yi >> kYShift;
const uint8_t* src_row_y = src_y + yi * (intptr_t)src_stride_y;
const uint8_t* src_row_u = src_u + uv_yi * (intptr_t)src_stride_u;
const uint8_t* src_row_v = src_v + uv_yi * (intptr_t)src_stride_v;
// Allocate 1 row of ARGB for source conversion and 2 rows of ARGB
// scaled horizontally to the destination width.
const int row_size = (dst_width * 4 + 31) & ~31;
align_buffer_64(row, row_size * 2 + src_width * 4);
uint8_t* argb_row = row + row_size * 2;
uint8_t* rowptr = row;
int rowstride = row_size;
int lasty = yi;
if (!row)
return 1;
// TODO(fbarchard): Convert first 2 rows of YUV to ARGB.
ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx);
if (src_height > 1) {
src_row_y += src_stride_y;
if (yi & 1) {
src_row_u += src_stride_u;
src_row_v += src_stride_v;
}
}
ScaleARGBFilterCols(rowptr + rowstride, src_row_y, dst_width, x, dx);
if (src_height > 2) {
src_row_y += src_stride_y;
if (!(yi & 1)) {
src_row_u += src_stride_u;
src_row_v += src_stride_v;
}
}
for (j = 0; j < dst_height; ++j) {
yi = y >> 16;
if (yi != lasty) {
if (y > max_y) {
y = max_y;
yi = y >> 16;
uv_yi = yi >> kYShift;
src_row_y = src_y + yi * (intptr_t)src_stride_y;
src_row_u = src_u + uv_yi * (intptr_t)src_stride_u;
src_row_v = src_v + uv_yi * (intptr_t)src_stride_v;
}
if (yi != lasty) {
// TODO(fbarchard): Convert the clipped region of row.
I422ToARGBRow(src_row_y, src_row_u, src_row_v, argb_row, src_width);
ScaleARGBFilterCols(rowptr, argb_row, dst_width, x, dx);
rowptr += rowstride;
rowstride = -rowstride;
lasty = yi;
src_row_y += src_stride_y;
if (yi & 1) {
src_row_u += src_stride_u;
src_row_v += src_stride_v;
}
}
}
if (filtering == kFilterLinear) {
InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
} else {
int yf = (y >> 8) & 255;
InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
}
dst_argb += dst_stride_argb;
y += dy;
}
free_aligned_buffer_64(row);
return 0;
}
#endif
// Scale ARGB to/from any dimensions, without interpolation.
// Fixed point math is used for performance: The upper 16 bits
// of x and dx is the integer part of the source position and
@ -608,8 +901,8 @@ static void ScaleARGBSimple(int src_width,
int src_height,
int dst_width,
int dst_height,
ptrdiff_t src_stride,
ptrdiff_t dst_stride,
int src_stride,
int dst_stride,
const uint8_t* src_argb,
uint8_t* dst_argb,
int x,
@ -652,8 +945,8 @@ static void ScaleARGBSimple(int src_width,
}
for (j = 0; j < dst_height; ++j) {
ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride, dst_width, x,
dx);
ScaleARGBCols(dst_argb, src_argb + (y >> 16) * (intptr_t)src_stride,
dst_width, x, dx);
dst_argb += dst_stride;
y += dy;
}
@ -688,7 +981,7 @@ static int ScaleARGB(const uint8_t* src,
// Negative src_height means invert the image.
if (src_height < 0) {
src_height = -src_height;
src = src + (src_height - 1) * (ptrdiff_t)src_stride;
src = src + (src_height - 1) * (intptr_t)src_stride;
src_stride = -src_stride;
}
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
@ -703,8 +996,8 @@ static int ScaleARGB(const uint8_t* src,
if (clip_y) {
int64_t clipf = (int64_t)(clip_y)*dy;
y += (clipf & 0xffff);
src += (clipf >> 16) * (ptrdiff_t)src_stride;
dst += clip_y * (ptrdiff_t)dst_stride;
src += (clipf >> 16) * (intptr_t)src_stride;
dst += clip_y * dst_stride;
}
// Special case for integer step values.
@ -737,7 +1030,7 @@ static int ScaleARGB(const uint8_t* src,
filtering = kFilterNone;
if (dx == 0x10000 && dy == 0x10000) {
// Straight copy.
ARGBCopy(src + (y >> 16) * (ptrdiff_t)src_stride + (x >> 16) * 4,
ARGBCopy(src + (y >> 16) * (intptr_t)src_stride + (x >> 16) * 4,
src_stride, dst, dst_stride, clip_width, clip_height);
return 0;
}
@ -779,9 +1072,9 @@ int ARGBScaleClip(const uint8_t* src_argb,
int clip_width,
int clip_height,
enum FilterMode filtering) {
if (!src_argb || src_width == 0 || src_height == 0 || src_height == INT_MIN ||
!dst_argb || dst_width <= 0 || dst_height <= 0 || clip_x < 0 ||
clip_y < 0 || clip_width > 32768 || clip_height > 32768 ||
if (!src_argb || src_width == 0 || src_height == 0 || !dst_argb ||
dst_width <= 0 || dst_height <= 0 || clip_x < 0 || clip_y < 0 ||
clip_width > 32768 || clip_height > 32768 ||
(clip_x + clip_width) > dst_width ||
(clip_y + clip_height) > dst_height) {
return -1;
@ -802,9 +1095,8 @@ int ARGBScale(const uint8_t* src_argb,
int dst_width,
int dst_height,
enum FilterMode filtering) {
if (!src_argb || src_width == 0 || src_height == 0 || src_height == INT_MIN ||
src_width > 32768 || src_height > 32768 || !dst_argb || dst_width <= 0 ||
dst_height <= 0) {
if (!src_argb || src_width == 0 || src_height == 0 || src_width > 32768 ||
src_height > 32768 || !dst_argb || dst_width <= 0 || dst_height <= 0) {
return -1;
}
return ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb,
@ -836,13 +1128,12 @@ int YUVToARGBScaleClip(const uint8_t* src_y,
int r;
(void)src_fourcc; // TODO(fbarchard): implement and/or assert.
(void)dst_fourcc;
const int abs_src_height = (src_height < 0) ? -src_height : src_height;
if (!src_y || !src_u || !src_v || !dst_argb || src_width <= 0 ||
src_width > INT_MAX / 4 || src_height == 0 || src_height == INT_MIN ||
dst_width <= 0 || dst_height <= 0 || clip_width <= 0 ||
clip_height <= 0) {
src_width > INT_MAX / 4 || src_height == 0 || dst_width <= 0 ||
dst_height <= 0 || clip_width <= 0 || clip_height <= 0) {
return -1;
}
const int abs_src_height = (src_height < 0) ? -src_height : src_height;
const uint64_t argb_buffer_size = (uint64_t)src_width * abs_src_height * 4;
if (argb_buffer_size > SIZE_MAX) {
return -1; // Invalid size.

View File

@ -792,10 +792,10 @@ void ScaleFilterCols64_C(uint8_t* dst_ptr,
#undef BLENDER
// Same as 8 bit arm blender but return is cast to uint16_t
#define BLENDER(a, b, f) \
(uint16_t)((int)(a) + \
(int)((((int64_t)((f)) * ((int64_t)(b) - (int)(a))) + 0x8000) >> \
16))
#define BLENDER(a, b, f) \
(uint16_t)( \
(int)(a) + \
(int)((((int64_t)((f)) * ((int64_t)(b) - (int)(a))) + 0x8000) >> 16))
void ScaleFilterCols_16_C(uint16_t* dst_ptr,
const uint16_t* src_ptr,
@ -1196,7 +1196,7 @@ void ScaleARGBColsUp2_C(uint8_t* dst_argb,
// TODO(fbarchard): Replace 0x7f ^ f with 128-f. bug=607.
// Mimics SSSE3 blender
#define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b) * f) >> 7
#define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b)*f) >> 7
#define BLENDERC(a, b, f, s) \
(uint32_t)(BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
#define BLENDER(a, b, f) \
@ -1636,6 +1636,14 @@ void ScalePlaneVertical(int src_height,
assert(dst_width > 0);
assert(dst_height > 0);
src_argb += (x >> 16) * bpp;
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(dst_width_bytes, 16)) {
InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2;
@ -1710,6 +1718,14 @@ void ScalePlaneVertical_16(int src_height,
assert(dst_width > 0);
assert(dst_height > 0);
src_argb += (x >> 16) * wpp;
#if defined(HAS_INTERPOLATEROW_16_SSE2)
if (TestCpuFlag(kCpuHasSSE2)) {
InterpolateRow = InterpolateRow_16_Any_SSE2;
if (IS_ALIGNED(dst_width_words, 16)) {
InterpolateRow = InterpolateRow_16_SSE2;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_16_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_16_Any_SSSE3;

View File

@ -1759,25 +1759,25 @@ void ScaleRowUp2_Bilinear_16_AVX2(const uint16_t* src_ptr,
void ScaleAddRow_SSE2(const uint8_t* src_ptr,
uint16_t* dst_ptr,
int src_width) {
asm volatile("pxor %%xmm5,%%xmm5 \n"
asm volatile("pxor %%xmm5,%%xmm5 \n"
// 16 pixel loop.
LABELALIGN
"1: \n"
"movdqu (%0),%%xmm3 \n"
"lea 0x10(%0),%0 \n" // src_ptr += 16
"movdqu (%1),%%xmm0 \n"
"movdqu 0x10(%1),%%xmm1 \n"
"movdqa %%xmm3,%%xmm2 \n"
"punpcklbw %%xmm5,%%xmm2 \n"
"punpckhbw %%xmm5,%%xmm3 \n"
"paddusw %%xmm2,%%xmm0 \n"
"paddusw %%xmm3,%%xmm1 \n"
"movdqu %%xmm0,(%1) \n"
"movdqu %%xmm1,0x10(%1) \n"
"lea 0x20(%1),%1 \n"
"sub $0x10,%2 \n"
"jg 1b \n"
"1: \n"
"movdqu (%0),%%xmm3 \n"
"lea 0x10(%0),%0 \n" // src_ptr += 16
"movdqu (%1),%%xmm0 \n"
"movdqu 0x10(%1),%%xmm1 \n"
"movdqa %%xmm3,%%xmm2 \n"
"punpcklbw %%xmm5,%%xmm2 \n"
"punpckhbw %%xmm5,%%xmm3 \n"
"paddusw %%xmm2,%%xmm0 \n"
"paddusw %%xmm3,%%xmm1 \n"
"movdqu %%xmm0,(%1) \n"
"movdqu %%xmm1,0x10(%1) \n"
"lea 0x20(%1),%1 \n"
"sub $0x10,%2 \n"
"jg 1b \n"
: "+r"(src_ptr), // %0
"+r"(dst_ptr), // %1
"+r"(src_width) // %2
@ -1790,23 +1790,23 @@ void ScaleAddRow_SSE2(const uint8_t* src_ptr,
void ScaleAddRow_AVX2(const uint8_t* src_ptr,
uint16_t* dst_ptr,
int src_width) {
asm volatile("vpxor %%ymm5,%%ymm5,%%ymm5 \n"
asm volatile("vpxor %%ymm5,%%ymm5,%%ymm5 \n"
LABELALIGN
"1: \n"
"vmovdqu (%0),%%ymm3 \n"
"lea 0x20(%0),%0 \n" // src_ptr += 32
"vpermq $0xd8,%%ymm3,%%ymm3 \n"
"vpunpcklbw %%ymm5,%%ymm3,%%ymm2 \n"
"vpunpckhbw %%ymm5,%%ymm3,%%ymm3 \n"
"vpaddusw (%1),%%ymm2,%%ymm0 \n"
"vpaddusw 0x20(%1),%%ymm3,%%ymm1 \n"
"vmovdqu %%ymm0,(%1) \n"
"vmovdqu %%ymm1,0x20(%1) \n"
"lea 0x40(%1),%1 \n"
"sub $0x20,%2 \n"
"jg 1b \n"
"vzeroupper \n"
"1: \n"
"vmovdqu (%0),%%ymm3 \n"
"lea 0x20(%0),%0 \n" // src_ptr += 32
"vpermq $0xd8,%%ymm3,%%ymm3 \n"
"vpunpcklbw %%ymm5,%%ymm3,%%ymm2 \n"
"vpunpckhbw %%ymm5,%%ymm3,%%ymm3 \n"
"vpaddusw (%1),%%ymm2,%%ymm0 \n"
"vpaddusw 0x20(%1),%%ymm3,%%ymm1 \n"
"vmovdqu %%ymm0,(%1) \n"
"vmovdqu %%ymm1,0x20(%1) \n"
"lea 0x40(%1),%1 \n"
"sub $0x20,%2 \n"
"jg 1b \n"
"vzeroupper \n"
: "+r"(src_ptr), // %0
"+r"(dst_ptr), // %1
"+r"(src_width) // %2

View File

@ -42,8 +42,8 @@ int RGBScale(const uint8_t* src_rgb,
enum FilterMode filtering) {
int r;
if (!src_rgb || !dst_rgb || src_width <= 0 || src_width > INT_MAX / 4 ||
src_height == 0 || src_height == INT_MIN || dst_width <= 0 ||
dst_width > INT_MAX / 4 || dst_height <= 0) {
src_height == 0 || dst_width <= 0 || dst_width > INT_MAX / 4 ||
dst_height <= 0) {
return -1;
}
const int abs_src_height = (src_height < 0) ? -src_height : src_height;

View File

@ -11,7 +11,6 @@
#include "libyuv/scale_uv.h"
#include <assert.h>
#include <limits.h>
#include <string.h>
#include "libyuv/cpu_id.h"
@ -60,8 +59,8 @@ static void ScaleUVDown2(int src_width,
int src_height,
int dst_width,
int dst_height,
ptrdiff_t src_stride,
ptrdiff_t dst_stride,
int src_stride,
int dst_stride,
const uint8_t* src_uv,
uint8_t* dst_uv,
int x,
@ -70,7 +69,7 @@ static void ScaleUVDown2(int src_width,
int dy,
enum FilterMode filtering) {
int j;
ptrdiff_t row_stride = src_stride * (dy >> 16);
int row_stride = src_stride * (dy >> 16);
void (*ScaleUVRowDown2)(const uint8_t* src_uv, ptrdiff_t src_stride,
uint8_t* dst_uv, int dst_width) =
filtering == kFilterNone
@ -84,9 +83,9 @@ static void ScaleUVDown2(int src_width,
assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
// Advance to odd row, even column.
if (filtering == kFilterBilinear) {
src_uv += (y >> 16) * src_stride + (x >> 16) * 2;
src_uv += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2;
} else {
src_uv += (y >> 16) * src_stride + ((x >> 16) - 1) * 2;
src_uv += (y >> 16) * (intptr_t)src_stride + ((x >> 16) - 1) * 2;
}
#if defined(HAS_SCALEUVROWDOWN2BOX_SSSE3)
@ -175,8 +174,8 @@ static int ScaleUVDown4Box(int src_width,
int src_height,
int dst_width,
int dst_height,
ptrdiff_t src_stride,
ptrdiff_t dst_stride,
int src_stride,
int dst_stride,
const uint8_t* src_uv,
uint8_t* dst_uv,
int x,
@ -189,12 +188,12 @@ static int ScaleUVDown4Box(int src_width,
align_buffer_64(row, row_size * 2);
if (!row)
return 1;
ptrdiff_t row_stride = src_stride * (dy >> 16);
int row_stride = src_stride * (dy >> 16);
void (*ScaleUVRowDown2)(const uint8_t* src_uv, ptrdiff_t src_stride,
uint8_t* dst_uv, int dst_width) =
ScaleUVRowDown2Box_C;
// Advance to odd row, even column.
src_uv += (y >> 16) * src_stride + (x >> 16) * 2;
src_uv += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2;
(void)src_width;
(void)src_height;
(void)dx;
@ -257,8 +256,8 @@ static void ScaleUVDownEven(int src_width,
int src_height,
int dst_width,
int dst_height,
ptrdiff_t src_stride,
ptrdiff_t dst_stride,
int src_stride,
int dst_stride,
const uint8_t* src_uv,
uint8_t* dst_uv,
int x,
@ -268,7 +267,7 @@ static void ScaleUVDownEven(int src_width,
enum FilterMode filtering) {
int j;
int col_step = dx >> 16;
ptrdiff_t row_stride = (dy >> 16) * src_stride;
ptrdiff_t row_stride = (ptrdiff_t)((dy >> 16) * (intptr_t)src_stride);
void (*ScaleUVRowDownEven)(const uint8_t* src_uv, ptrdiff_t src_stride,
int src_step, uint8_t* dst_uv, int dst_width) =
filtering ? ScaleUVRowDownEvenBox_C : ScaleUVRowDownEven_C;
@ -276,7 +275,7 @@ static void ScaleUVDownEven(int src_width,
(void)src_height;
assert(IS_ALIGNED(src_width, 2));
assert(IS_ALIGNED(src_height, 2));
src_uv += (y >> 16) * src_stride + (x >> 16) * 2;
src_uv += (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2;
#if defined(HAS_SCALEUVROWDOWNEVEN_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ScaleUVRowDownEven = filtering ? ScaleUVRowDownEvenBox_Any_SSSE3
@ -335,8 +334,8 @@ static int ScaleUVBilinearDown(int src_width,
int src_height,
int dst_width,
int dst_height,
ptrdiff_t src_stride,
ptrdiff_t dst_stride,
int src_stride,
int dst_stride,
const uint8_t* src_uv,
uint8_t* dst_uv,
int x,
@ -364,6 +363,14 @@ static int ScaleUVBilinearDown(int src_width,
clip_src_width = (int)(xr - xl) * 2; // Width aligned to 2.
src_uv += xl * 2;
x -= (int)(xl << 16);
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(clip_src_width, 16)) {
InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2;
@ -423,7 +430,7 @@ static int ScaleUVBilinearDown(int src_width,
}
for (j = 0; j < dst_height; ++j) {
int yi = y >> 16;
const uint8_t* src = src_uv + yi * src_stride;
const uint8_t* src = src_uv + yi * (intptr_t)src_stride;
if (filtering == kFilterLinear) {
ScaleUVFilterCols(dst_uv, src, dst_width, x, dx);
} else {
@ -449,8 +456,8 @@ static int ScaleUVBilinearUp(int src_width,
int src_height,
int dst_width,
int dst_height,
ptrdiff_t src_stride,
ptrdiff_t dst_stride,
int src_stride,
int dst_stride,
const uint8_t* src_uv,
uint8_t* dst_uv,
int x,
@ -466,6 +473,14 @@ static int ScaleUVBilinearUp(int src_width,
int dst_width, int x, int dx) =
filtering ? ScaleUVFilterCols_C : ScaleUVCols_C;
const int max_y = (src_height - 1) << 16;
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(dst_width, 8)) {
InterpolateRow = InterpolateRow_SSSE3;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
InterpolateRow = InterpolateRow_Any_AVX2;
@ -544,7 +559,7 @@ static int ScaleUVBilinearUp(int src_width,
{
int yi = y >> 16;
const uint8_t* src = src_uv + yi * src_stride;
const uint8_t* src = src_uv + yi * (intptr_t)src_stride;
// Allocate 2 rows of UV.
const int row_size = (dst_width * 2 + 15) & ~15;
@ -553,7 +568,7 @@ static int ScaleUVBilinearUp(int src_width,
return 1;
uint8_t* rowptr = row;
ptrdiff_t rowstride = row_size;
int rowstride = row_size;
int lasty = yi;
ScaleUVFilterCols(rowptr, src, dst_width, x, dx);
@ -571,7 +586,7 @@ static int ScaleUVBilinearUp(int src_width,
if (y > max_y) {
y = max_y;
yi = y >> 16;
src = src_uv + yi * src_stride;
src = src_uv + yi * (intptr_t)src_stride;
}
if (yi != lasty) {
ScaleUVFilterCols(rowptr, src, dst_width, x, dx);
@ -607,8 +622,8 @@ static void ScaleUVLinearUp2(int src_width,
int src_height,
int dst_width,
int dst_height,
ptrdiff_t src_stride,
ptrdiff_t dst_stride,
int src_stride,
int dst_stride,
const uint8_t* src_uv,
uint8_t* dst_uv) {
void (*ScaleRowUp)(const uint8_t* src_uv, uint8_t* dst_uv, int dst_width) =
@ -646,12 +661,13 @@ static void ScaleUVLinearUp2(int src_width,
#endif
if (dst_height == 1) {
ScaleRowUp(src_uv + ((src_height - 1) / 2) * src_stride, dst_uv, dst_width);
ScaleRowUp(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride, dst_uv,
dst_width);
} else {
dy = FixedDiv(src_height - 1, dst_height - 1);
y = (1 << 15) - 1;
for (i = 0; i < dst_height; ++i) {
ScaleRowUp(src_uv + (y >> 16) * src_stride, dst_uv, dst_width);
ScaleRowUp(src_uv + (y >> 16) * (intptr_t)src_stride, dst_uv, dst_width);
dst_uv += dst_stride;
y += dy;
}
@ -727,8 +743,8 @@ static void ScaleUVLinearUp2_16(int src_width,
int src_height,
int dst_width,
int dst_height,
ptrdiff_t src_stride,
ptrdiff_t dst_stride,
int src_stride,
int dst_stride,
const uint16_t* src_uv,
uint16_t* dst_uv) {
void (*ScaleRowUp)(const uint16_t* src_uv, uint16_t* dst_uv, int dst_width) =
@ -760,12 +776,13 @@ static void ScaleUVLinearUp2_16(int src_width,
#endif
if (dst_height == 1) {
ScaleRowUp(src_uv + ((src_height - 1) / 2) * src_stride, dst_uv, dst_width);
ScaleRowUp(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride, dst_uv,
dst_width);
} else {
dy = FixedDiv(src_height - 1, dst_height - 1);
y = (1 << 15) - 1;
for (i = 0; i < dst_height; ++i) {
ScaleRowUp(src_uv + (y >> 16) * src_stride, dst_uv, dst_width);
ScaleRowUp(src_uv + (y >> 16) * (intptr_t)src_stride, dst_uv, dst_width);
dst_uv += dst_stride;
y += dy;
}
@ -835,8 +852,8 @@ static void ScaleUVSimple(int src_width,
int src_height,
int dst_width,
int dst_height,
ptrdiff_t src_stride,
ptrdiff_t dst_stride,
int src_stride,
int dst_stride,
const uint8_t* src_uv,
uint8_t* dst_uv,
int x,
@ -871,7 +888,8 @@ static void ScaleUVSimple(int src_width,
}
for (j = 0; j < dst_height; ++j) {
ScaleUVCols(dst_uv, src_uv + (y >> 16) * src_stride, dst_width, x, dx);
ScaleUVCols(dst_uv, src_uv + (y >> 16) * (intptr_t)src_stride, dst_width, x,
dx);
dst_uv += dst_stride;
y += dy;
}
@ -885,13 +903,13 @@ static int UVCopy(const uint8_t* src_uv,
int dst_stride_uv,
int width,
int height) {
if (!src_uv || !dst_uv || width <= 0 || height == 0 || height == INT_MIN) {
if (!src_uv || !dst_uv || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_uv = src_uv + (height - 1) * (ptrdiff_t)src_stride_uv;
src_uv = src_uv + (height - 1) * (intptr_t)src_stride_uv;
src_stride_uv = -src_stride_uv;
}
@ -905,13 +923,13 @@ static int UVCopy_16(const uint16_t* src_uv,
int dst_stride_uv,
int width,
int height) {
if (!src_uv || !dst_uv || width <= 0 || height == 0 || height == INT_MIN) {
if (!src_uv || !dst_uv || width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_uv = src_uv + (height - 1) * (ptrdiff_t)src_stride_uv;
src_uv = src_uv + (height - 1) * (intptr_t)src_stride_uv;
src_stride_uv = -src_stride_uv;
}
@ -949,7 +967,7 @@ static int ScaleUV(const uint8_t* src,
// Negative src_height means invert the image.
if (src_height < 0) {
src_height = -src_height;
src = src + (src_height - 1) * (ptrdiff_t)src_stride;
src = src + (src_height - 1) * (intptr_t)src_stride;
src_stride = -src_stride;
}
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
@ -964,8 +982,8 @@ static int ScaleUV(const uint8_t* src,
if (clip_y) {
int64_t clipf = (int64_t)(clip_y)*dy;
y += (clipf & 0xffff);
src += (clipf >> 16) * (ptrdiff_t)src_stride;
dst += clip_y * (ptrdiff_t)dst_stride;
src += (clipf >> 16) * (intptr_t)src_stride;
dst += clip_y * dst_stride;
}
// Special case for integer step values.
@ -1005,8 +1023,9 @@ static int ScaleUV(const uint8_t* src,
#ifdef HAS_UVCOPY
if (dx == 0x10000 && dy == 0x10000) {
// Straight copy.
return UVCopy(src + (y >> 16) * (ptrdiff_t)src_stride + (x >> 16) * 2,
src_stride, dst, dst_stride, clip_width, clip_height);
UVCopy(src + (y >> 16) * (intptr_t)src_stride + (x >> 16) * 2,
src_stride, dst, dst_stride, clip_width, clip_height);
return 0;
}
#endif
}
@ -1062,8 +1081,7 @@ int UVScale(const uint8_t* src_uv,
int dst_height,
enum FilterMode filtering) {
if (!src_uv || src_width <= 0 || src_height == 0 || src_width > 32768 ||
src_height < -32768 || src_height > 32768 || !dst_uv || dst_width <= 0 ||
dst_height <= 0) {
src_height > 32768 || !dst_uv || dst_width <= 0 || dst_height <= 0) {
return -1;
}
return ScaleUV(src_uv, src_stride_uv, src_width, src_height, dst_uv,
@ -1085,9 +1103,8 @@ int UVScale_16(const uint16_t* src_uv,
enum FilterMode filtering) {
int dy = 0;
if (!src_uv || src_width <= 0 || src_height == 0 || src_height == INT_MIN ||
src_width > 32768 || src_height > 32768 || !dst_uv || dst_width <= 0 ||
dst_height <= 0) {
if (!src_uv || src_width <= 0 || src_height == 0 || src_width > 32768 ||
src_height > 32768 || !dst_uv || dst_width <= 0 || dst_height <= 0) {
return -1;
}
@ -1099,7 +1116,7 @@ int UVScale_16(const uint16_t* src_uv,
// Negative src_height means invert the image.
if (src_height < 0) {
src_height = -src_height;
src_uv = src_uv + (src_height - 1) * (ptrdiff_t)src_stride_uv;
src_uv = src_uv + (src_height - 1) * (intptr_t)src_stride_uv;
src_stride_uv = -src_stride_uv;
}
src_width = Abs(src_width);
@ -1107,17 +1124,16 @@ int UVScale_16(const uint16_t* src_uv,
#ifdef HAS_UVCOPY
if (!filtering && src_width == dst_width && (src_height % dst_height == 0)) {
if (dst_height == 1) {
return UVCopy_16(
src_uv + ((src_height - 1) / 2) * (ptrdiff_t)src_stride_uv,
src_stride_uv, dst_uv, dst_stride_uv, dst_width, dst_height);
UVCopy_16(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride_uv,
src_stride_uv, dst_uv, dst_stride_uv, dst_width, dst_height);
} else {
dy = src_height / dst_height;
UVCopy_16(src_uv + ((dy - 1) / 2) * (intptr_t)src_stride_uv,
(int)(dy * (intptr_t)src_stride_uv), dst_uv, dst_stride_uv,
dst_width, dst_height);
}
dy = src_height / dst_height;
if (src_stride_uv > INT_MAX / dy) {
return -1;
}
return UVCopy_16(src_uv + ((dy - 1) / 2) * (ptrdiff_t)src_stride_uv,
dy * src_stride_uv, dst_uv, dst_stride_uv, dst_width,
dst_height);
return 0;
}
#endif

View File

@ -104,7 +104,7 @@ __declspec(naked) void ScaleRowDown2_SSSE3(const uint8_t* src_ptr,
movdqu xmm0, [eax]
movdqu xmm1, [eax + 16]
lea eax, [eax + 32]
psrlw xmm0, 8 // isolate odd pixels.
psrlw xmm0, 8 // isolate odd pixels.
psrlw xmm1, 8
packuswb xmm0, xmm1
movdqu [edx], xmm0
@ -138,7 +138,7 @@ __declspec(naked) void ScaleRowDown2Linear_SSSE3(const uint8_t* src_ptr,
lea eax, [eax + 32]
pmaddubsw xmm0, xmm4 // horizontal add
pmaddubsw xmm1, xmm4
pavgw xmm0, xmm5 // (x + 1) / 2
pavgw xmm0, xmm5 // (x + 1) / 2
pavgw xmm1, xmm5
packuswb xmm0, xmm1
movdqu [edx], xmm0
@ -213,7 +213,7 @@ __declspec(naked) void ScaleRowDown2_AVX2(const uint8_t* src_ptr,
vpsrlw ymm0, ymm0, 8 // isolate odd pixels.
vpsrlw ymm1, ymm1, 8
vpackuswb ymm0, ymm0, ymm1
vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb
vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb
vmovdqu [edx], ymm0
lea edx, [edx + 32]
sub ecx, 32
@ -249,7 +249,7 @@ __declspec(naked) void ScaleRowDown2Linear_AVX2(const uint8_t* src_ptr,
vpavgw ymm0, ymm0, ymm5 // (x + 1) / 2
vpavgw ymm1, ymm1, ymm5
vpackuswb ymm0, ymm0, ymm1
vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb
vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb
vmovdqu [edx], ymm0
lea edx, [edx + 32]
sub ecx, 32
@ -319,7 +319,7 @@ __declspec(naked) void ScaleRowDown4_SSSE3(const uint8_t* src_ptr,
// src_stride ignored
mov edx, [esp + 12] // dst_ptr
mov ecx, [esp + 16] // dst_width
pcmpeqb xmm5, xmm5 // generate mask 0x00ff0000
pcmpeqb xmm5, xmm5 // generate mask 0x00ff0000
psrld xmm5, 24
pslld xmm5, 16
@ -424,7 +424,7 @@ __declspec(naked) void ScaleRowDown4_AVX2(const uint8_t* src_ptr,
vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb
vpsrlw ymm0, ymm0, 8
vpackuswb ymm0, ymm0, ymm0
vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb
vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb
vmovdqu [edx], xmm0
lea edx, [edx + 16]
sub ecx, 16
@ -687,7 +687,7 @@ __declspec(naked) void ScaleRowDown38_SSSE3(const uint8_t* src_ptr,
pshufb xmm1, xmm5
paddusb xmm0, xmm1
movq qword ptr [edx], xmm0 // write 12 pixels
movq qword ptr [edx], xmm0 // write 12 pixels
movhlps xmm1, xmm0
movd [edx + 8], xmm1
lea edx, [edx + 12]
@ -1030,7 +1030,7 @@ __declspec(naked) void ScaleARGBRowDown2Linear_SSE2(const uint8_t* src_argb,
lea eax, [eax + 32]
movdqa xmm2, xmm0
shufps xmm0, xmm1, 0x88 // even pixels
shufps xmm2, xmm1, 0xdd // odd pixels
shufps xmm2, xmm1, 0xdd // odd pixels
pavgb xmm0, xmm2
movdqu [edx], xmm0
lea edx, [edx + 16]
@ -1216,7 +1216,7 @@ __declspec(naked) void ScaleARGBCols_SSE2(uint8_t* dst_argb,
test ecx, 2
je xloop29
// 2 Pixels.
// 2 Pixels.
movd xmm0, [esi + eax * 4] // 1 source x0 pixels
movd xmm1, [esi + edx * 4] // 1 source x1 pixels
pextrw eax, xmm2, 5 // get x2 integer.
@ -1229,7 +1229,7 @@ __declspec(naked) void ScaleARGBCols_SSE2(uint8_t* dst_argb,
test ecx, 1
je xloop99
// 1 Pixels.
// 1 Pixels.
movd xmm0, [esi + eax * 4] // 1 source x2 pixels
movd dword ptr [edi], xmm0
xloop99:

View File

@ -22,22 +22,22 @@ TEST_F(LibYUVBaseTest, SizeOfTypes) {
uint32_t u32 = 1u;
int64_t i64 = -1;
uint64_t u64 = 1u;
ASSERT_EQ(1u, sizeof(i8));
ASSERT_EQ(1u, sizeof(u8));
ASSERT_EQ(2u, sizeof(i16));
ASSERT_EQ(2u, sizeof(u16));
ASSERT_EQ(4u, sizeof(i32));
ASSERT_EQ(4u, sizeof(u32));
ASSERT_EQ(8u, sizeof(i64));
ASSERT_EQ(8u, sizeof(u64));
ASSERT_GT(0, i8);
ASSERT_LT(0u, u8);
ASSERT_GT(0, i16);
ASSERT_LT(0u, u16);
ASSERT_GT(0, i32);
ASSERT_LT(0u, u32);
ASSERT_GT(0, i64);
ASSERT_LT(0u, u64);
EXPECT_EQ(1u, sizeof(i8));
EXPECT_EQ(1u, sizeof(u8));
EXPECT_EQ(2u, sizeof(i16));
EXPECT_EQ(2u, sizeof(u16));
EXPECT_EQ(4u, sizeof(i32));
EXPECT_EQ(4u, sizeof(u32));
EXPECT_EQ(8u, sizeof(i64));
EXPECT_EQ(8u, sizeof(u64));
EXPECT_GT(0, i8);
EXPECT_LT(0u, u8);
EXPECT_GT(0, i16);
EXPECT_LT(0u, u16);
EXPECT_GT(0, i32);
EXPECT_LT(0u, u32);
EXPECT_GT(0, i64);
EXPECT_LT(0u, u64);
}
} // namespace libyuv

View File

@ -22,8 +22,14 @@ namespace libyuv {
// TODO(fbarchard): clang x86 has a higher accuracy YUV to RGB.
// Port to Visual C and other CPUs
#if !defined(LIBYUV_BIT_EXACT) && !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || defined(__i386__))
#define ERROR_FULL 5
#define ERROR_J420 4
#else
#define ERROR_FULL 6
#define ERROR_J420 6
#endif
#define ERROR_R 1
#define ERROR_G 1
#ifdef LIBYUV_UNLIMITED_DATA
@ -113,11 +119,11 @@ namespace libyuv {
} \
/* Test C and SIMD match. */ \
for (int i = 0; i < kPixels * 4; ++i) { \
ASSERT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); \
EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]); \
} \
/* Test SIMD is close to original. */ \
for (int i = 0; i < kPixels * 4; ++i) { \
ASSERT_NEAR(static_cast<int>(orig_pixels[i]), \
EXPECT_NEAR(static_cast<int>(orig_pixels[i]), \
static_cast<int>(dst_pixels_opt[i]), DIFF); \
} \
\
@ -425,16 +431,15 @@ TEST_F(LibYUVColorTest, TestRoundToByte) {
allb |= b;
}
}
ASSERT_GE(allb, 0);
ASSERT_LE(allb, 255);
EXPECT_GE(allb, 0);
EXPECT_LE(allb, 255);
}
// BT.601 limited range YUV to RGB reference
static void YUVToRGBReference(int y, int u, int v, int* r, int* g, int* b) {
double y1 = (y - 16) * 1.164;
*r = RoundToByte(y1 - (v - 128) * -1.596);
*g = RoundToByte(y1 - (u - 128) * 0.391 - (v - 128) * 0.813);
*b = RoundToByte(y1 - (u - 128) * -2.018);
*r = RoundToByte((y - 16) * 1.164 - (v - 128) * -1.596);
*g = RoundToByte((y - 16) * 1.164 - (u - 128) * 0.391 - (v - 128) * 0.813);
*b = RoundToByte((y - 16) * 1.164 - (u - 128) * -2.018);
}
// BT.601 full range YUV to RGB reference (aka JPEG)
@ -447,10 +452,9 @@ static void YUVJToRGBReference(int y, int u, int v, int* r, int* g, int* b) {
// BT.709 limited range YUV to RGB reference
// See also http://www.equasys.de/colorconversion.html
static void YUVHToRGBReference(int y, int u, int v, int* r, int* g, int* b) {
double y1 = (y - 16) * 1.164;
*r = RoundToByte(y1 - (v - 128) * -1.793);
*g = RoundToByte(y1 - (u - 128) * 0.213 - (v - 128) * 0.533);
*b = RoundToByte(y1 - (u - 128) * -2.112);
*r = RoundToByte((y - 16) * 1.164 - (v - 128) * -1.793);
*g = RoundToByte((y - 16) * 1.164 - (u - 128) * 0.213 - (v - 128) * 0.533);
*b = RoundToByte((y - 16) * 1.164 - (u - 128) * -2.112);
}
// BT.709 full range YUV to RGB reference
@ -462,10 +466,10 @@ static void YUVFToRGBReference(int y, int u, int v, int* r, int* g, int* b) {
// BT.2020 limited range YUV to RGB reference
static void YUVUToRGBReference(int y, int u, int v, int* r, int* g, int* b) {
double y1 = (y - 16) * 1.164384;
*r = RoundToByte(y1 - (v - 128) * -1.67867);
*g = RoundToByte(y1 - (u - 128) * 0.187326 - (v - 128) * 0.65042);
*b = RoundToByte(y1 - (u - 128) * -2.14177);
*r = RoundToByte((y - 16) * 1.164384 - (v - 128) * -1.67867);
*g = RoundToByte((y - 16) * 1.164384 - (u - 128) * 0.187326 -
(v - 128) * 0.65042);
*b = RoundToByte((y - 16) * 1.164384 - (u - 128) * -2.14177);
}
// BT.2020 full range YUV to RGB reference
@ -480,48 +484,48 @@ TEST_F(LibYUVColorTest, TestYUV) {
// cyan (less red)
YUVToRGBReference(240, 255, 0, &r0, &g0, &b0);
ASSERT_EQ(56, r0);
ASSERT_EQ(255, g0);
ASSERT_EQ(255, b0);
EXPECT_EQ(56, r0);
EXPECT_EQ(255, g0);
EXPECT_EQ(255, b0);
YUVToRGB(240, 255, 0, &r1, &g1, &b1);
ASSERT_EQ(57, r1);
ASSERT_EQ(255, g1);
ASSERT_EQ(255, b1);
EXPECT_EQ(57, r1);
EXPECT_EQ(255, g1);
EXPECT_EQ(255, b1);
// green (less red and blue)
YUVToRGBReference(240, 0, 0, &r0, &g0, &b0);
ASSERT_EQ(56, r0);
ASSERT_EQ(255, g0);
ASSERT_EQ(2, b0);
EXPECT_EQ(56, r0);
EXPECT_EQ(255, g0);
EXPECT_EQ(2, b0);
YUVToRGB(240, 0, 0, &r1, &g1, &b1);
ASSERT_EQ(57, r1);
ASSERT_EQ(255, g1);
EXPECT_EQ(57, r1);
EXPECT_EQ(255, g1);
#ifdef LIBYUV_UNLIMITED_DATA
ASSERT_EQ(3, b1);
EXPECT_EQ(3, b1);
#else
ASSERT_EQ(5, b1);
EXPECT_EQ(5, b1);
#endif
for (int i = 0; i < 256; ++i) {
YUVToRGBReference(i, 128, 128, &r0, &g0, &b0);
YUVToRGB(i, 128, 128, &r1, &g1, &b1);
ASSERT_NEAR(r0, r1, ERROR_R);
ASSERT_NEAR(g0, g1, ERROR_G);
ASSERT_NEAR(b0, b1, ERROR_B);
EXPECT_NEAR(r0, r1, ERROR_R);
EXPECT_NEAR(g0, g1, ERROR_G);
EXPECT_NEAR(b0, b1, ERROR_B);
YUVToRGBReference(i, 0, 0, &r0, &g0, &b0);
YUVToRGB(i, 0, 0, &r1, &g1, &b1);
ASSERT_NEAR(r0, r1, ERROR_R);
ASSERT_NEAR(g0, g1, ERROR_G);
ASSERT_NEAR(b0, b1, ERROR_B);
EXPECT_NEAR(r0, r1, ERROR_R);
EXPECT_NEAR(g0, g1, ERROR_G);
EXPECT_NEAR(b0, b1, ERROR_B);
YUVToRGBReference(i, 0, 255, &r0, &g0, &b0);
YUVToRGB(i, 0, 255, &r1, &g1, &b1);
ASSERT_NEAR(r0, r1, ERROR_R);
ASSERT_NEAR(g0, g1, ERROR_G);
ASSERT_NEAR(b0, b1, ERROR_B);
EXPECT_NEAR(r0, r1, ERROR_R);
EXPECT_NEAR(g0, g1, ERROR_G);
EXPECT_NEAR(b0, b1, ERROR_B);
}
}
@ -530,47 +534,47 @@ TEST_F(LibYUVColorTest, TestGreyYUV) {
// black
YUVToRGBReference(16, 128, 128, &r0, &g0, &b0);
ASSERT_EQ(0, r0);
ASSERT_EQ(0, g0);
ASSERT_EQ(0, b0);
EXPECT_EQ(0, r0);
EXPECT_EQ(0, g0);
EXPECT_EQ(0, b0);
YUVToRGB(16, 128, 128, &r1, &g1, &b1);
ASSERT_EQ(0, r1);
ASSERT_EQ(0, g1);
ASSERT_EQ(0, b1);
EXPECT_EQ(0, r1);
EXPECT_EQ(0, g1);
EXPECT_EQ(0, b1);
// white
YUVToRGBReference(240, 128, 128, &r0, &g0, &b0);
ASSERT_EQ(255, r0);
ASSERT_EQ(255, g0);
ASSERT_EQ(255, b0);
EXPECT_EQ(255, r0);
EXPECT_EQ(255, g0);
EXPECT_EQ(255, b0);
YUVToRGB(240, 128, 128, &r1, &g1, &b1);
ASSERT_EQ(255, r1);
ASSERT_EQ(255, g1);
ASSERT_EQ(255, b1);
EXPECT_EQ(255, r1);
EXPECT_EQ(255, g1);
EXPECT_EQ(255, b1);
// grey
YUVToRGBReference(128, 128, 128, &r0, &g0, &b0);
ASSERT_EQ(130, r0);
ASSERT_EQ(130, g0);
ASSERT_EQ(130, b0);
EXPECT_EQ(130, r0);
EXPECT_EQ(130, g0);
EXPECT_EQ(130, b0);
YUVToRGB(128, 128, 128, &r1, &g1, &b1);
ASSERT_EQ(130, r1);
ASSERT_EQ(130, g1);
ASSERT_EQ(130, b1);
EXPECT_EQ(130, r1);
EXPECT_EQ(130, g1);
EXPECT_EQ(130, b1);
for (int y = 0; y < 256; ++y) {
YUVToRGBReference(y, 128, 128, &r0, &g0, &b0);
YUVToRGB(y, 128, 128, &r1, &g1, &b1);
YToRGB(y, &r2, &g2, &b2);
ASSERT_EQ(r0, r1);
ASSERT_EQ(g0, g1);
ASSERT_EQ(b0, b1);
ASSERT_EQ(r0, r2);
ASSERT_EQ(g0, g2);
ASSERT_EQ(b0, b2);
EXPECT_EQ(r0, r1);
EXPECT_EQ(g0, g1);
EXPECT_EQ(b0, b1);
EXPECT_EQ(r0, r2);
EXPECT_EQ(g0, g2);
EXPECT_EQ(b0, b2);
}
}
@ -608,11 +612,10 @@ static void PrintHistogram(int rh[256], int gh[256], int bh[256]) {
#ifdef DISABLE_SLOW_TESTS
#define FASTSTEP 5
#else
#define FASTSTEP 3
#define FASTSTEP 1
#endif
// BT.601 limited range.
#ifndef DISABLE_SLOW_TESTS
TEST_F(LibYUVColorTest, TestFullYUV) {
int rh[256] = {
0,
@ -623,16 +626,16 @@ TEST_F(LibYUVColorTest, TestFullYUV) {
int bh[256] = {
0,
};
for (int u = 0; u < 256; u += FASTSTEP) {
for (int v = 0; v < 256; v += FASTSTEP) {
for (int u = 0; u < 256; ++u) {
for (int v = 0; v < 256; ++v) {
for (int y2 = 0; y2 < 256; y2 += FASTSTEP) {
int r0, g0, b0, r1, g1, b1;
int y = RANDOM256(y2);
YUVToRGBReference(y, u, v, &r0, &g0, &b0);
YUVToRGB(y, u, v, &r1, &g1, &b1);
ASSERT_NEAR(r0, r1, ERROR_R);
ASSERT_NEAR(g0, g1, ERROR_G);
ASSERT_NEAR(b0, b1, ERROR_B);
EXPECT_NEAR(r0, r1, ERROR_R);
EXPECT_NEAR(g0, g1, ERROR_G);
EXPECT_NEAR(b0, b1, ERROR_B);
++rh[r1 - r0 + 128];
++gh[g1 - g0 + 128];
++bh[b1 - b0 + 128];
@ -653,16 +656,16 @@ TEST_F(LibYUVColorTest, TestFullYUVJ) {
int bh[256] = {
0,
};
for (int u = 0; u < 256; u += FASTSTEP) {
for (int v = 0; v < 256; v += FASTSTEP) {
for (int u = 0; u < 256; ++u) {
for (int v = 0; v < 256; ++v) {
for (int y2 = 0; y2 < 256; y2 += FASTSTEP) {
int r0, g0, b0, r1, g1, b1;
int y = RANDOM256(y2);
YUVJToRGBReference(y, u, v, &r0, &g0, &b0);
YUVJToRGB(y, u, v, &r1, &g1, &b1);
ASSERT_NEAR(r0, r1, ERROR_R);
ASSERT_NEAR(g0, g1, ERROR_G);
ASSERT_NEAR(b0, b1, ERROR_B);
EXPECT_NEAR(r0, r1, ERROR_R);
EXPECT_NEAR(g0, g1, ERROR_G);
EXPECT_NEAR(b0, b1, ERROR_B);
++rh[r1 - r0 + 128];
++gh[g1 - g0 + 128];
++bh[b1 - b0 + 128];
@ -683,16 +686,16 @@ TEST_F(LibYUVColorTest, TestFullYUVH) {
int bh[256] = {
0,
};
for (int u = 0; u < 256; u += FASTSTEP) {
for (int v = 0; v < 256; v += FASTSTEP) {
for (int u = 0; u < 256; ++u) {
for (int v = 0; v < 256; ++v) {
for (int y2 = 0; y2 < 256; y2 += FASTSTEP) {
int r0, g0, b0, r1, g1, b1;
int y = RANDOM256(y2);
YUVHToRGBReference(y, u, v, &r0, &g0, &b0);
YUVHToRGB(y, u, v, &r1, &g1, &b1);
ASSERT_NEAR(r0, r1, ERROR_R);
ASSERT_NEAR(g0, g1, ERROR_G);
ASSERT_NEAR(b0, b1, ERROR_B);
EXPECT_NEAR(r0, r1, ERROR_R);
EXPECT_NEAR(g0, g1, ERROR_G);
EXPECT_NEAR(b0, b1, ERROR_B);
++rh[r1 - r0 + 128];
++gh[g1 - g0 + 128];
++bh[b1 - b0 + 128];
@ -713,16 +716,16 @@ TEST_F(LibYUVColorTest, TestFullYUVF) {
int bh[256] = {
0,
};
for (int u = 0; u < 256; u += FASTSTEP) {
for (int v = 0; v < 256; v += FASTSTEP) {
for (int u = 0; u < 256; ++u) {
for (int v = 0; v < 256; ++v) {
for (int y2 = 0; y2 < 256; y2 += FASTSTEP) {
int r0, g0, b0, r1, g1, b1;
int y = RANDOM256(y2);
YUVFToRGBReference(y, u, v, &r0, &g0, &b0);
YUVFToRGB(y, u, v, &r1, &g1, &b1);
ASSERT_NEAR(r0, r1, ERROR_R);
ASSERT_NEAR(g0, g1, ERROR_G);
ASSERT_NEAR(b0, b1, ERROR_B);
EXPECT_NEAR(r0, r1, ERROR_R);
EXPECT_NEAR(g0, g1, ERROR_G);
EXPECT_NEAR(b0, b1, ERROR_B);
++rh[r1 - r0 + 128];
++gh[g1 - g0 + 128];
++bh[b1 - b0 + 128];
@ -743,16 +746,16 @@ TEST_F(LibYUVColorTest, TestFullYUVU) {
int bh[256] = {
0,
};
for (int u = 0; u < 256; u += FASTSTEP) {
for (int v = 0; v < 256; v += FASTSTEP) {
for (int u = 0; u < 256; ++u) {
for (int v = 0; v < 256; ++v) {
for (int y2 = 0; y2 < 256; y2 += FASTSTEP) {
int r0, g0, b0, r1, g1, b1;
int y = RANDOM256(y2);
YUVUToRGBReference(y, u, v, &r0, &g0, &b0);
YUVUToRGB(y, u, v, &r1, &g1, &b1);
ASSERT_NEAR(r0, r1, ERROR_R);
ASSERT_NEAR(g0, g1, ERROR_G);
ASSERT_NEAR(b0, b1, ERROR_B);
EXPECT_NEAR(r0, r1, ERROR_R);
EXPECT_NEAR(g0, g1, ERROR_G);
EXPECT_NEAR(b0, b1, ERROR_B);
++rh[r1 - r0 + 128];
++gh[g1 - g0 + 128];
++bh[b1 - b0 + 128];
@ -773,16 +776,16 @@ TEST_F(LibYUVColorTest, TestFullYUVV) {
int bh[256] = {
0,
};
for (int u = 0; u < 256; u += FASTSTEP) {
for (int v = 0; v < 256; v += FASTSTEP) {
for (int u = 0; u < 256; ++u) {
for (int v = 0; v < 256; ++v) {
for (int y2 = 0; y2 < 256; y2 += FASTSTEP) {
int r0, g0, b0, r1, g1, b1;
int y = RANDOM256(y2);
YUVVToRGBReference(y, u, v, &r0, &g0, &b0);
YUVVToRGB(y, u, v, &r1, &g1, &b1);
ASSERT_NEAR(r0, r1, ERROR_R);
ASSERT_NEAR(g0, g1, 2);
ASSERT_NEAR(b0, b1, ERROR_B);
EXPECT_NEAR(r0, r1, ERROR_R);
EXPECT_NEAR(g0, g1, 2);
EXPECT_NEAR(b0, b1, ERROR_B);
++rh[r1 - r0 + 128];
++gh[g1 - g0 + 128];
++bh[b1 - b0 + 128];
@ -791,8 +794,6 @@ TEST_F(LibYUVColorTest, TestFullYUVV) {
}
PrintHistogram(rh, gh, bh);
}
#endif // DISABLE_SLOW_TESTS
#undef FASTSTEP
TEST_F(LibYUVColorTest, TestGreyYUVJ) {
@ -800,47 +801,47 @@ TEST_F(LibYUVColorTest, TestGreyYUVJ) {
// black
YUVJToRGBReference(0, 128, 128, &r0, &g0, &b0);
ASSERT_EQ(0, r0);
ASSERT_EQ(0, g0);
ASSERT_EQ(0, b0);
EXPECT_EQ(0, r0);
EXPECT_EQ(0, g0);
EXPECT_EQ(0, b0);
YUVJToRGB(0, 128, 128, &r1, &g1, &b1);
ASSERT_EQ(0, r1);
ASSERT_EQ(0, g1);
ASSERT_EQ(0, b1);
EXPECT_EQ(0, r1);
EXPECT_EQ(0, g1);
EXPECT_EQ(0, b1);
// white
YUVJToRGBReference(255, 128, 128, &r0, &g0, &b0);
ASSERT_EQ(255, r0);
ASSERT_EQ(255, g0);
ASSERT_EQ(255, b0);
EXPECT_EQ(255, r0);
EXPECT_EQ(255, g0);
EXPECT_EQ(255, b0);
YUVJToRGB(255, 128, 128, &r1, &g1, &b1);
ASSERT_EQ(255, r1);
ASSERT_EQ(255, g1);
ASSERT_EQ(255, b1);
EXPECT_EQ(255, r1);
EXPECT_EQ(255, g1);
EXPECT_EQ(255, b1);
// grey
YUVJToRGBReference(128, 128, 128, &r0, &g0, &b0);
ASSERT_EQ(128, r0);
ASSERT_EQ(128, g0);
ASSERT_EQ(128, b0);
EXPECT_EQ(128, r0);
EXPECT_EQ(128, g0);
EXPECT_EQ(128, b0);
YUVJToRGB(128, 128, 128, &r1, &g1, &b1);
ASSERT_EQ(128, r1);
ASSERT_EQ(128, g1);
ASSERT_EQ(128, b1);
EXPECT_EQ(128, r1);
EXPECT_EQ(128, g1);
EXPECT_EQ(128, b1);
for (int y = 0; y < 256; ++y) {
YUVJToRGBReference(y, 128, 128, &r0, &g0, &b0);
YUVJToRGB(y, 128, 128, &r1, &g1, &b1);
YJToRGB(y, &r2, &g2, &b2);
ASSERT_EQ(r0, r1);
ASSERT_EQ(g0, g1);
ASSERT_EQ(b0, b1);
ASSERT_EQ(r0, r2);
ASSERT_EQ(g0, g2);
ASSERT_EQ(b0, b2);
EXPECT_EQ(r0, r1);
EXPECT_EQ(g0, g1);
EXPECT_EQ(b0, b1);
EXPECT_EQ(r0, r2);
EXPECT_EQ(g0, g2);
EXPECT_EQ(b0, b2);
}
}

View File

@ -48,7 +48,7 @@ TEST_F(LibYUVCompareTest, Djb2_Test) {
" together with Hermann Zapf";
uint32_t foxhash = HashDjb2(reinterpret_cast<const uint8_t*>(fox), 131, 5381);
const uint32_t kExpectedFoxHash = 2611006483u;
ASSERT_EQ(kExpectedFoxHash, foxhash);
EXPECT_EQ(kExpectedFoxHash, foxhash);
for (int i = 0; i < kMaxTest; ++i) {
src_a[i] = (fastrand() & 0xff);
@ -57,13 +57,13 @@ TEST_F(LibYUVCompareTest, Djb2_Test) {
// Compare different buffers. Expect hash is different.
uint32_t h1 = HashDjb2(src_a, kMaxTest, 5381);
uint32_t h2 = HashDjb2(src_b, kMaxTest, 5381);
ASSERT_NE(h1, h2);
EXPECT_NE(h1, h2);
// Make last half same. Expect hash is different.
memcpy(src_a + kMaxTest / 2, src_b + kMaxTest / 2, kMaxTest / 2);
h1 = HashDjb2(src_a, kMaxTest, 5381);
h2 = HashDjb2(src_b, kMaxTest, 5381);
ASSERT_NE(h1, h2);
EXPECT_NE(h1, h2);
// Make first half same. Expect hash is different.
memcpy(src_a + kMaxTest / 2, src_a, kMaxTest / 2);
@ -71,52 +71,52 @@ TEST_F(LibYUVCompareTest, Djb2_Test) {
memcpy(src_a, src_b, kMaxTest / 2);
h1 = HashDjb2(src_a, kMaxTest, 5381);
h2 = HashDjb2(src_b, kMaxTest, 5381);
ASSERT_NE(h1, h2);
EXPECT_NE(h1, h2);
// Make same. Expect hash is same.
memcpy(src_a, src_b, kMaxTest);
h1 = HashDjb2(src_a, kMaxTest, 5381);
h2 = HashDjb2(src_b, kMaxTest, 5381);
ASSERT_EQ(h1, h2);
EXPECT_EQ(h1, h2);
// Mask seed different. Expect hash is different.
memcpy(src_a, src_b, kMaxTest);
h1 = HashDjb2(src_a, kMaxTest, 5381);
h2 = HashDjb2(src_b, kMaxTest, 1234);
ASSERT_NE(h1, h2);
EXPECT_NE(h1, h2);
// Make one byte different in middle. Expect hash is different.
memcpy(src_a, src_b, kMaxTest);
++src_b[kMaxTest / 2];
h1 = HashDjb2(src_a, kMaxTest, 5381);
h2 = HashDjb2(src_b, kMaxTest, 5381);
ASSERT_NE(h1, h2);
EXPECT_NE(h1, h2);
// Make first byte different. Expect hash is different.
memcpy(src_a, src_b, kMaxTest);
++src_b[0];
h1 = HashDjb2(src_a, kMaxTest, 5381);
h2 = HashDjb2(src_b, kMaxTest, 5381);
ASSERT_NE(h1, h2);
EXPECT_NE(h1, h2);
// Make last byte different. Expect hash is different.
memcpy(src_a, src_b, kMaxTest);
++src_b[kMaxTest - 1];
h1 = HashDjb2(src_a, kMaxTest, 5381);
h2 = HashDjb2(src_b, kMaxTest, 5381);
ASSERT_NE(h1, h2);
EXPECT_NE(h1, h2);
// Make a zeros. Test different lengths. Expect hash is different.
memset(src_a, 0, kMaxTest);
h1 = HashDjb2(src_a, kMaxTest, 5381);
h2 = HashDjb2(src_a, kMaxTest / 2, 5381);
ASSERT_NE(h1, h2);
EXPECT_NE(h1, h2);
// Make a zeros and seed of zero. Test different lengths. Expect hash is same.
memset(src_a, 0, kMaxTest);
h1 = HashDjb2(src_a, kMaxTest, 0);
h2 = HashDjb2(src_a, kMaxTest / 2, 0);
ASSERT_EQ(h1, h2);
EXPECT_EQ(h1, h2);
free_aligned_buffer_page_end(src_a);
free_aligned_buffer_page_end(src_b);
@ -134,7 +134,7 @@ TEST_F(LibYUVCompareTest, BenchmarkDjb2_Opt) {
for (int i = 0; i < benchmark_iterations_; ++i) {
h1 = HashDjb2(src_a, kMaxTest, 5381);
}
ASSERT_EQ(h1, h2);
EXPECT_EQ(h1, h2);
free_aligned_buffer_page_end(src_a);
}
@ -149,7 +149,7 @@ TEST_F(LibYUVCompareTest, BenchmarkDjb2_Unaligned) {
for (int i = 0; i < benchmark_iterations_; ++i) {
h1 = HashDjb2(src_a + 1, kMaxTest, 5381);
}
ASSERT_EQ(h1, h2);
EXPECT_EQ(h1, h2);
free_aligned_buffer_page_end(src_a);
}
@ -164,19 +164,19 @@ TEST_F(LibYUVCompareTest, BenchmarkARGBDetect_Opt) {
src_a[0] = 0;
fourcc = ARGBDetect(src_a, benchmark_width_ * 4, benchmark_width_,
benchmark_height_);
ASSERT_EQ(static_cast<uint32_t>(libyuv::FOURCC_BGRA), fourcc);
EXPECT_EQ(static_cast<uint32_t>(libyuv::FOURCC_BGRA), fourcc);
src_a[0] = 255;
src_a[3] = 0;
fourcc = ARGBDetect(src_a, benchmark_width_ * 4, benchmark_width_,
benchmark_height_);
ASSERT_EQ(static_cast<uint32_t>(libyuv::FOURCC_ARGB), fourcc);
EXPECT_EQ(static_cast<uint32_t>(libyuv::FOURCC_ARGB), fourcc);
src_a[3] = 255;
for (int i = 0; i < benchmark_iterations_; ++i) {
fourcc = ARGBDetect(src_a, benchmark_width_ * 4, benchmark_width_,
benchmark_height_);
}
ASSERT_EQ(0u, fourcc);
EXPECT_EQ(0u, fourcc);
free_aligned_buffer_page_end(src_a);
}
@ -192,19 +192,19 @@ TEST_F(LibYUVCompareTest, BenchmarkARGBDetect_Unaligned) {
src_a[0 + 1] = 0;
fourcc = ARGBDetect(src_a + 1, benchmark_width_ * 4, benchmark_width_,
benchmark_height_);
ASSERT_EQ(static_cast<uint32_t>(libyuv::FOURCC_BGRA), fourcc);
EXPECT_EQ(static_cast<uint32_t>(libyuv::FOURCC_BGRA), fourcc);
src_a[0 + 1] = 255;
src_a[3 + 1] = 0;
fourcc = ARGBDetect(src_a + 1, benchmark_width_ * 4, benchmark_width_,
benchmark_height_);
ASSERT_EQ(static_cast<uint32_t>(libyuv::FOURCC_ARGB), fourcc);
EXPECT_EQ(static_cast<uint32_t>(libyuv::FOURCC_ARGB), fourcc);
src_a[3 + 1] = 255;
for (int i = 0; i < benchmark_iterations_; ++i) {
fourcc = ARGBDetect(src_a + 1, benchmark_width_ * 4, benchmark_width_,
benchmark_height_);
}
ASSERT_EQ(0u, fourcc);
EXPECT_EQ(0u, fourcc);
free_aligned_buffer_page_end(src_a);
}
@ -221,7 +221,7 @@ TEST_F(LibYUVCompareTest, BenchmarkHammingDistance_Opt) {
memcpy(src_a, "test0123test4567", 16);
memcpy(src_b, "tick0123tock4567", 16);
uint32_t h1 = HammingDistance_C(src_a, src_b, 16);
ASSERT_EQ(16u, h1);
EXPECT_EQ(16u, h1);
// Test C vs OPT on random buffer
MemRandomize(src_a, kMaxWidth);
@ -263,7 +263,7 @@ TEST_F(LibYUVCompareTest, BenchmarkHammingDistance_Opt) {
h1 = HammingDistance_C(src_a, src_b, kMaxWidth);
#endif
}
ASSERT_EQ(h0, h1);
EXPECT_EQ(h0, h1);
free_aligned_buffer_page_end(src_a);
free_aligned_buffer_page_end(src_b);
@ -280,7 +280,7 @@ TEST_F(LibYUVCompareTest, BenchmarkHammingDistance_C) {
memcpy(src_a, "test0123test4567", 16);
memcpy(src_b, "tick0123tock4567", 16);
uint32_t h1 = HammingDistance_C(src_a, src_b, 16);
ASSERT_EQ(16u, h1);
EXPECT_EQ(16u, h1);
// Test C vs OPT on random buffer
MemRandomize(src_a, kMaxWidth);
@ -295,7 +295,7 @@ TEST_F(LibYUVCompareTest, BenchmarkHammingDistance_C) {
h1 = HammingDistance_C(src_a, src_b, kMaxWidth);
}
ASSERT_EQ(h0, h1);
EXPECT_EQ(h0, h1);
free_aligned_buffer_page_end(src_a);
free_aligned_buffer_page_end(src_b);
@ -311,7 +311,7 @@ TEST_F(LibYUVCompareTest, BenchmarkHammingDistance) {
memcpy(src_a, "test0123test4567", 16);
memcpy(src_b, "tick0123tock4567", 16);
uint64_t h1 = ComputeHammingDistance(src_a, src_b, 16);
ASSERT_EQ(16u, h1);
EXPECT_EQ(16u, h1);
// Test C vs OPT on random buffer
MemRandomize(src_a, kMaxWidth);
@ -326,7 +326,7 @@ TEST_F(LibYUVCompareTest, BenchmarkHammingDistance) {
h1 = ComputeHammingDistance(src_a, src_b, kMaxWidth);
}
ASSERT_EQ(h0, h1);
EXPECT_EQ(h0, h1);
free_aligned_buffer_page_end(src_a);
free_aligned_buffer_page_end(src_b);
@ -351,7 +351,7 @@ TEST_F(LibYUVCompareTest, TestHammingDistance_Opt) {
memset(src_b, 0u, kMaxWidth);
uint64_t h0 = ComputeHammingDistance(src_a, src_b, kMaxWidth);
ASSERT_EQ(kMaxWidth * 8ULL, h0);
EXPECT_EQ(kMaxWidth * 8ULL, h0);
for (int i = 0; i < benchmark_iterations_; ++i) {
#if defined(HAS_HAMMINGDISTANCE_NEON)
@ -389,7 +389,7 @@ TEST_F(LibYUVCompareTest, TestHammingDistance_Opt) {
// result can not be expected to be correct.
// TODO(fbarchard): Consider expecting the low 16 bits to match.
if (kMaxWidth <= kMaxOptCount) {
ASSERT_EQ(kMaxWidth * 8U, h1);
EXPECT_EQ(kMaxWidth * 8U, h1);
} else {
if (kMaxWidth * 8ULL != static_cast<uint64_t>(h1)) {
printf(
@ -420,7 +420,7 @@ TEST_F(LibYUVCompareTest, TestHammingDistance) {
h1 = ComputeHammingDistance(src_a, src_b,
benchmark_width_ * benchmark_height_);
}
ASSERT_EQ(benchmark_width_ * benchmark_height_ * 8ULL, h1);
EXPECT_EQ(benchmark_width_ * benchmark_height_ * 8ULL, h1);
free_aligned_buffer_page_end(src_a);
free_aligned_buffer_page_end(src_b);
@ -436,7 +436,7 @@ TEST_F(LibYUVCompareTest, BenchmarkSumSquareError_Opt) {
memcpy(src_a, "test0123test4567", 16);
memcpy(src_b, "tick0123tock4567", 16);
uint64_t h1 = ComputeSumSquareError(src_a, src_b, 16);
ASSERT_EQ(790u, h1);
EXPECT_EQ(790u, h1);
for (int i = 0; i < kMaxWidth; ++i) {
src_a[i] = i;
@ -452,7 +452,7 @@ TEST_F(LibYUVCompareTest, BenchmarkSumSquareError_Opt) {
h1 = ComputeSumSquareError(src_a, src_b, kMaxWidth);
}
ASSERT_EQ(0u, h1);
EXPECT_EQ(0u, h1);
free_aligned_buffer_page_end(src_a);
free_aligned_buffer_page_end(src_b);
@ -468,18 +468,18 @@ TEST_F(LibYUVCompareTest, SumSquareError) {
uint64_t err;
err = ComputeSumSquareError(src_a, src_b, kMaxWidth);
ASSERT_EQ(0u, err);
EXPECT_EQ(0u, err);
memset(src_a, 1, kMaxWidth);
err = ComputeSumSquareError(src_a, src_b, kMaxWidth);
ASSERT_EQ(static_cast<int>(err), kMaxWidth);
EXPECT_EQ(static_cast<int>(err), kMaxWidth);
memset(src_a, 190, kMaxWidth);
memset(src_b, 193, kMaxWidth);
err = ComputeSumSquareError(src_a, src_b, kMaxWidth);
ASSERT_EQ(static_cast<int>(err), kMaxWidth * 3 * 3);
EXPECT_EQ(static_cast<int>(err), kMaxWidth * 3 * 3);
for (int i = 0; i < kMaxWidth; ++i) {
src_a[i] = (fastrand() & 0xff);
@ -492,7 +492,7 @@ TEST_F(LibYUVCompareTest, SumSquareError) {
MaskCpuFlags(benchmark_cpu_info_);
uint64_t opt_err = ComputeSumSquareError(src_a, src_b, kMaxWidth);
ASSERT_EQ(c_err, opt_err);
EXPECT_EQ(c_err, opt_err);
free_aligned_buffer_page_end(src_a);
free_aligned_buffer_page_end(src_b);
@ -517,7 +517,7 @@ TEST_F(LibYUVCompareTest, BenchmarkPsnr_Opt) {
opt_time = (get_time() - opt_time) / benchmark_iterations_;
printf("BenchmarkPsnr_Opt - %8.2f us opt\n", opt_time * 1e6);
ASSERT_EQ(0, 0);
EXPECT_EQ(0, 0);
free_aligned_buffer_page_end(src_a);
free_aligned_buffer_page_end(src_b);
@ -542,7 +542,7 @@ TEST_F(LibYUVCompareTest, BenchmarkPsnr_Unaligned) {
opt_time = (get_time() - opt_time) / benchmark_iterations_;
printf("BenchmarkPsnr_Opt - %8.2f us opt\n", opt_time * 1e6);
ASSERT_EQ(0, 0);
EXPECT_EQ(0, 0);
free_aligned_buffer_page_end(src_a);
free_aligned_buffer_page_end(src_b);
@ -564,7 +564,7 @@ TEST_F(LibYUVCompareTest, Psnr) {
src_b + kSrcStride * b + b, kSrcStride, kSrcWidth,
kSrcHeight);
ASSERT_EQ(err, kMaxPsnr);
EXPECT_EQ(err, kMaxPsnr);
memset(src_a, 255, kSrcPlaneSize);
@ -572,7 +572,7 @@ TEST_F(LibYUVCompareTest, Psnr) {
src_b + kSrcStride * b + b, kSrcStride, kSrcWidth,
kSrcHeight);
ASSERT_EQ(err, 0.0);
EXPECT_EQ(err, 0.0);
memset(src_a, 1, kSrcPlaneSize);
@ -580,8 +580,8 @@ TEST_F(LibYUVCompareTest, Psnr) {
src_b + kSrcStride * b + b, kSrcStride, kSrcWidth,
kSrcHeight);
ASSERT_GT(err, 48.0);
ASSERT_LT(err, 49.0);
EXPECT_GT(err, 48.0);
EXPECT_LT(err, 49.0);
for (int i = 0; i < kSrcPlaneSize; ++i) {
src_a[i] = i;
@ -591,9 +591,9 @@ TEST_F(LibYUVCompareTest, Psnr) {
src_b + kSrcStride * b + b, kSrcStride, kSrcWidth,
kSrcHeight);
ASSERT_GT(err, 2.0);
EXPECT_GT(err, 2.0);
if (kSrcWidth * kSrcHeight >= 256) {
ASSERT_LT(err, 6.0);
EXPECT_LT(err, 6.0);
}
memset(src_a, 0, kSrcPlaneSize);
@ -619,7 +619,7 @@ TEST_F(LibYUVCompareTest, Psnr) {
src_b + kSrcStride * b + b, kSrcStride, kSrcWidth,
kSrcHeight);
ASSERT_EQ(opt_err, c_err);
EXPECT_EQ(opt_err, c_err);
free_aligned_buffer_page_end(src_a);
free_aligned_buffer_page_end(src_b);
@ -644,7 +644,7 @@ TEST_F(LibYUVCompareTest, DISABLED_BenchmarkSsim_Opt) {
opt_time = (get_time() - opt_time) / benchmark_iterations_;
printf("BenchmarkSsim_Opt - %8.2f us opt\n", opt_time * 1e6);
ASSERT_EQ(0, 0); // Pass if we get this far.
EXPECT_EQ(0, 0); // Pass if we get this far.
free_aligned_buffer_page_end(src_a);
free_aligned_buffer_page_end(src_b);
@ -671,7 +671,7 @@ TEST_F(LibYUVCompareTest, Ssim) {
kSrcHeight);
if (kSrcWidth > 8 && kSrcHeight > 8) {
ASSERT_EQ(err, 1.0);
EXPECT_EQ(err, 1.0);
}
memset(src_a, 255, kSrcPlaneSize);
@ -681,7 +681,7 @@ TEST_F(LibYUVCompareTest, Ssim) {
kSrcHeight);
if (kSrcWidth > 8 && kSrcHeight > 8) {
ASSERT_LT(err, 0.0001);
EXPECT_LT(err, 0.0001);
}
memset(src_a, 1, kSrcPlaneSize);
@ -691,8 +691,8 @@ TEST_F(LibYUVCompareTest, Ssim) {
kSrcHeight);
if (kSrcWidth > 8 && kSrcHeight > 8) {
ASSERT_GT(err, 0.0001);
ASSERT_LT(err, 0.9);
EXPECT_GT(err, 0.0001);
EXPECT_LT(err, 0.9);
}
for (int i = 0; i < kSrcPlaneSize; ++i) {
@ -704,8 +704,8 @@ TEST_F(LibYUVCompareTest, Ssim) {
kSrcHeight);
if (kSrcWidth > 8 && kSrcHeight > 8) {
ASSERT_GT(err, 0.0);
ASSERT_LT(err, 0.01);
EXPECT_GT(err, 0.0);
EXPECT_LT(err, 0.01);
}
for (int i = b; i < (kSrcHeight + b); ++i) {
@ -729,7 +729,7 @@ TEST_F(LibYUVCompareTest, Ssim) {
kSrcHeight);
if (kSrcWidth > 8 && kSrcHeight > 8) {
ASSERT_EQ(opt_err, c_err);
EXPECT_EQ(opt_err, c_err);
}
free_aligned_buffer_page_end(src_a);

View File

@ -53,9 +53,9 @@ namespace libyuv {
#define ABGRToABGR ARGBCopy
// subsample amount uses a divide.
#define SUBSAMPLE(v, a) ((((v) + (a) - 1)) / (a))
#define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a))
#define ALIGNINT(V, ALIGN) (((V) + (ALIGN) - 1) / (ALIGN) * (ALIGN))
#define ALIGNINT(V, ALIGN) (((V) + (ALIGN)-1) / (ALIGN) * (ALIGN))
#define TESTBPTOPI(SRC_FMT_PLANAR, SRC_T, SRC_BPC, SRC_SUBSAMP_X, \
SRC_SUBSAMP_Y, FMT_PLANAR, DST_T, DST_BPC, DST_SUBSAMP_X, \
@ -82,19 +82,15 @@ namespace libyuv {
(kHeight + (TILE_HEIGHT - 1)) & ~(TILE_HEIGHT - 1); \
const int kSrcHalfPaddedWidth = SUBSAMPLE(kPaddedWidth, SRC_SUBSAMP_X); \
const int kSrcHalfPaddedHeight = SUBSAMPLE(kPaddedHeight, SRC_SUBSAMP_Y); \
align_buffer_page_end(src_y, \
kPaddedWidth * kPaddedHeight * SRC_BPC + OFF); \
align_buffer_page_end(src_y, kPaddedWidth* kPaddedHeight* SRC_BPC + OFF); \
align_buffer_page_end( \
src_uv, \
kSrcHalfPaddedWidth * kSrcHalfPaddedHeight * SRC_BPC * 2 + OFF); \
align_buffer_page_end(dst_y_c, kWidth * kHeight * DST_BPC); \
align_buffer_page_end(dst_u_c, kDstHalfWidth * kDstHalfHeight * DST_BPC); \
align_buffer_page_end(dst_v_c, kDstHalfWidth * kDstHalfHeight * DST_BPC); \
align_buffer_page_end(dst_y_opt, kWidth * kHeight * DST_BPC); \
align_buffer_page_end(dst_u_opt, \
kDstHalfWidth * kDstHalfHeight * DST_BPC); \
align_buffer_page_end(dst_v_opt, \
kDstHalfWidth * kDstHalfHeight * DST_BPC); \
src_uv, kSrcHalfPaddedWidth* kSrcHalfPaddedHeight* SRC_BPC * 2 + OFF); \
align_buffer_page_end(dst_y_c, kWidth* kHeight* DST_BPC); \
align_buffer_page_end(dst_u_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
align_buffer_page_end(dst_v_c, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
align_buffer_page_end(dst_y_opt, kWidth* kHeight* DST_BPC); \
align_buffer_page_end(dst_u_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
align_buffer_page_end(dst_v_opt, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
SRC_T* src_y_p = reinterpret_cast<SRC_T*>(src_y + OFF); \
SRC_T* src_uv_p = reinterpret_cast<SRC_T*>(src_uv + OFF); \
for (int i = 0; i < kPaddedWidth * kPaddedHeight; ++i) { \
@ -105,12 +101,12 @@ namespace libyuv {
src_uv_p[i] = \
(fastrand() & (((SRC_T)(-1)) << ((8 * SRC_BPC) - SRC_DEPTH))); \
} \
memset(dst_y_c, 1, kWidth * kHeight * DST_BPC); \
memset(dst_u_c, 2, kDstHalfWidth * kDstHalfHeight * DST_BPC); \
memset(dst_v_c, 3, kDstHalfWidth * kDstHalfHeight * DST_BPC); \
memset(dst_y_opt, 101, kWidth * kHeight * DST_BPC); \
memset(dst_u_opt, 102, kDstHalfWidth * kDstHalfHeight * DST_BPC); \
memset(dst_v_opt, 103, kDstHalfWidth * kDstHalfHeight * DST_BPC); \
memset(dst_y_c, 1, kWidth* kHeight* DST_BPC); \
memset(dst_u_c, 2, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
memset(dst_v_c, 3, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
memset(dst_y_opt, 101, kWidth* kHeight* DST_BPC); \
memset(dst_u_opt, 102, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
memset(dst_v_opt, 103, kDstHalfWidth* kDstHalfHeight* DST_BPC); \
MaskCpuFlags(disable_cpu_flags_); \
SRC_FMT_PLANAR##To##FMT_PLANAR( \
src_y_p, kWidth, src_uv_p, kSrcHalfWidth * 2, \
@ -128,11 +124,11 @@ namespace libyuv {
NEG kHeight); \
} \
for (int i = 0; i < kHeight * kWidth * DST_BPC; ++i) { \
ASSERT_EQ(dst_y_c[i], dst_y_opt[i]); \
EXPECT_EQ(dst_y_c[i], dst_y_opt[i]); \
} \
for (int i = 0; i < kDstHalfWidth * kDstHalfHeight * DST_BPC; ++i) { \
ASSERT_EQ(dst_u_c[i], dst_u_opt[i]); \
ASSERT_EQ(dst_v_c[i], dst_v_opt[i]); \
EXPECT_EQ(dst_u_c[i], dst_u_opt[i]); \
EXPECT_EQ(dst_v_c[i], dst_v_opt[i]); \
} \
free_aligned_buffer_page_end(dst_y_c); \
free_aligned_buffer_page_end(dst_u_c); \
@ -227,11 +223,11 @@ TESTBPTOP(P012, uint16_t, 2, 2, 2, I012, uint16_t, 2, 2, 2, 12, 1, 1)
const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
align_buffer_page_end(src_y, kWidth * kHeight + OFF); \
align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
align_buffer_page_end(src_u, kSizeUV + OFF); \
align_buffer_page_end(src_v, kSizeUV + OFF); \
align_buffer_page_end(dst_argb_c, kStrideB * kHeight + OFF); \
align_buffer_page_end(dst_argb_opt, kStrideB * kHeight + OFF); \
align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF); \
align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF); \
for (int i = 0; i < kWidth * kHeight; ++i) { \
src_y[i + OFF] = (fastrand() & 0xff); \
} \
@ -258,7 +254,7 @@ TESTBPTOP(P012, uint16_t, 2, 2, 2, I012, uint16_t, 2, 2, 2, 12, 1, 1)
static_cast<int>((time1 - time0) * 1e6), \
static_cast<int>((time2 - time1) * 1e6 / benchmark_iterations_)); \
for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \
ASSERT_EQ(dst_argb_c[i + OFF], dst_argb_opt[i + OFF]); \
EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_opt[i + OFF]); \
} \
free_aligned_buffer_page_end(src_y); \
free_aligned_buffer_page_end(src_u); \
@ -385,58 +381,58 @@ TESTPLANARTOB(I444, 1, 1, ABGR, 4, 4, 1)
TESTPLANARTOB(I444, 1, 1, ARGB, 4, 4, 1)
#endif
#define TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
W1280, N, NEG, OFF) \
TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
const int kWidth = W1280; \
const int kHeight = benchmark_height_; \
const int kStrideB = kWidth * BPP_B; \
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
align_buffer_page_end(src_y, kWidth * kHeight + OFF); \
align_buffer_page_end( \
src_uv, kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y) * 2 + OFF); \
align_buffer_page_end(dst_argb_c, kStrideB * kHeight); \
align_buffer_page_end(dst_argb_opt, kStrideB * kHeight); \
for (int i = 0; i < kHeight; ++i) \
for (int j = 0; j < kWidth; ++j) \
src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
for (int j = 0; j < kStrideUV * 2; ++j) { \
src_uv[i * kStrideUV * 2 + j + OFF] = (fastrand() & 0xff); \
} \
} \
memset(dst_argb_c, 1, kStrideB * kHeight); \
memset(dst_argb_opt, 101, kStrideB * kHeight); \
MaskCpuFlags(disable_cpu_flags_); \
FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_uv + OFF, kStrideUV * 2, \
dst_argb_c, kWidth * BPP_B, kWidth, NEG kHeight); \
MaskCpuFlags(benchmark_cpu_info_); \
for (int i = 0; i < benchmark_iterations_; ++i) { \
FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_uv + OFF, kStrideUV * 2, \
dst_argb_opt, kWidth * BPP_B, kWidth, \
NEG kHeight); \
} \
/* Convert to ARGB so 565 is expanded to bytes that can be compared. */ \
align_buffer_page_end(dst_argb32_c, kWidth * 4 * kHeight); \
align_buffer_page_end(dst_argb32_opt, kWidth * 4 * kHeight); \
memset(dst_argb32_c, 2, kWidth * 4 * kHeight); \
memset(dst_argb32_opt, 102, kWidth * 4 * kHeight); \
FMT_C##ToARGB(dst_argb_c, kStrideB, dst_argb32_c, kWidth * 4, kWidth, \
kHeight); \
FMT_C##ToARGB(dst_argb_opt, kStrideB, dst_argb32_opt, kWidth * 4, kWidth, \
kHeight); \
for (int i = 0; i < kHeight; ++i) { \
for (int j = 0; j < kWidth * 4; ++j) { \
ASSERT_EQ(dst_argb32_c[i * kWidth * 4 + j], \
dst_argb32_opt[i * kWidth * 4 + j]); \
} \
} \
free_aligned_buffer_page_end(src_y); \
free_aligned_buffer_page_end(src_uv); \
free_aligned_buffer_page_end(dst_argb_c); \
free_aligned_buffer_page_end(dst_argb_opt); \
free_aligned_buffer_page_end(dst_argb32_c); \
free_aligned_buffer_page_end(dst_argb32_opt); \
#define TESTBPTOBI(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, FMT_C, BPP_B, \
W1280, N, NEG, OFF) \
TEST_F(LibYUVConvertTest, FMT_PLANAR##To##FMT_B##N) { \
const int kWidth = W1280; \
const int kHeight = benchmark_height_; \
const int kStrideB = kWidth * BPP_B; \
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
align_buffer_page_end(src_uv, \
kStrideUV* SUBSAMPLE(kHeight, SUBSAMP_Y) * 2 + OFF); \
align_buffer_page_end(dst_argb_c, kStrideB* kHeight); \
align_buffer_page_end(dst_argb_opt, kStrideB* kHeight); \
for (int i = 0; i < kHeight; ++i) \
for (int j = 0; j < kWidth; ++j) \
src_y[i * kWidth + j + OFF] = (fastrand() & 0xff); \
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
for (int j = 0; j < kStrideUV * 2; ++j) { \
src_uv[i * kStrideUV * 2 + j + OFF] = (fastrand() & 0xff); \
} \
} \
memset(dst_argb_c, 1, kStrideB* kHeight); \
memset(dst_argb_opt, 101, kStrideB* kHeight); \
MaskCpuFlags(disable_cpu_flags_); \
FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_uv + OFF, kStrideUV * 2, \
dst_argb_c, kWidth * BPP_B, kWidth, NEG kHeight); \
MaskCpuFlags(benchmark_cpu_info_); \
for (int i = 0; i < benchmark_iterations_; ++i) { \
FMT_PLANAR##To##FMT_B(src_y + OFF, kWidth, src_uv + OFF, kStrideUV * 2, \
dst_argb_opt, kWidth * BPP_B, kWidth, \
NEG kHeight); \
} \
/* Convert to ARGB so 565 is expanded to bytes that can be compared. */ \
align_buffer_page_end(dst_argb32_c, kWidth * 4 * kHeight); \
align_buffer_page_end(dst_argb32_opt, kWidth * 4 * kHeight); \
memset(dst_argb32_c, 2, kWidth * 4 * kHeight); \
memset(dst_argb32_opt, 102, kWidth * 4 * kHeight); \
FMT_C##ToARGB(dst_argb_c, kStrideB, dst_argb32_c, kWidth * 4, kWidth, \
kHeight); \
FMT_C##ToARGB(dst_argb_opt, kStrideB, dst_argb32_opt, kWidth * 4, kWidth, \
kHeight); \
for (int i = 0; i < kHeight; ++i) { \
for (int j = 0; j < kWidth * 4; ++j) { \
EXPECT_EQ(dst_argb32_c[i * kWidth * 4 + j], \
dst_argb32_opt[i * kWidth * 4 + j]); \
} \
} \
free_aligned_buffer_page_end(src_y); \
free_aligned_buffer_page_end(src_uv); \
free_aligned_buffer_page_end(dst_argb_c); \
free_aligned_buffer_page_end(dst_argb_opt); \
free_aligned_buffer_page_end(dst_argb32_c); \
free_aligned_buffer_page_end(dst_argb32_opt); \
}
#if defined(ENABLE_FULL_TESTS)
@ -511,16 +507,15 @@ TESTBPTOB(NV12, 2, 2, RGB565, RGB565, 2)
const int kStrideB = \
(kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
align_buffer_page_end(src_argb, \
kStrideA * kHeightA * (int)sizeof(TYPE_A) + OFF); \
align_buffer_page_end(dst_argb_c, \
kStrideB * kHeightB * (int)sizeof(TYPE_B)); \
kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \
align_buffer_page_end(dst_argb_c, kStrideB* kHeightB*(int)sizeof(TYPE_B)); \
align_buffer_page_end(dst_argb_opt, \
kStrideB * kHeightB * (int)sizeof(TYPE_B)); \
kStrideB* kHeightB*(int)sizeof(TYPE_B)); \
for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \
src_argb[i + OFF] = (fastrand() & 0xff); \
} \
memset(dst_argb_c, 1, kStrideB * kHeightB); \
memset(dst_argb_opt, 101, kStrideB * kHeightB); \
memset(dst_argb_c, 1, kStrideB* kHeightB); \
memset(dst_argb_opt, 101, kStrideB* kHeightB); \
MaskCpuFlags(disable_cpu_flags_); \
FMT_A##To##FMT_B((TYPE_A*)(src_argb + OFF), kStrideA, (TYPE_B*)dst_argb_c, \
kStrideB, kWidth, NEG kHeight); \
@ -530,49 +525,48 @@ TESTBPTOB(NV12, 2, 2, RGB565, RGB565, 2)
(TYPE_B*)dst_argb_opt, kStrideB, kWidth, NEG kHeight); \
} \
for (int i = 0; i < kStrideB * kHeightB * (int)sizeof(TYPE_B); ++i) { \
ASSERT_EQ(dst_argb_c[i], dst_argb_opt[i]); \
EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \
} \
free_aligned_buffer_page_end(src_argb); \
free_aligned_buffer_page_end(dst_argb_c); \
free_aligned_buffer_page_end(dst_argb_opt); \
}
#define TESTATOBRANDOM(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, \
TYPE_B, EPP_B, STRIDE_B, HEIGHT_B) \
TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##_Random) { \
for (int times = 0; times < benchmark_iterations_; ++times) { \
const int kWidth = (fastrand() & 63) + 1; \
const int kHeight = (fastrand() & 31) + 1; \
const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \
const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \
const int kStrideA = \
(kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
const int kStrideB = \
(kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
align_buffer_page_end(src_argb, \
kStrideA * kHeightA * (int)sizeof(TYPE_A)); \
align_buffer_page_end(dst_argb_c, \
kStrideB * kHeightB * (int)sizeof(TYPE_B)); \
align_buffer_page_end(dst_argb_opt, \
kStrideB * kHeightB * (int)sizeof(TYPE_B)); \
for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \
src_argb[i] = 0xfe; \
} \
memset(dst_argb_c, 123, kStrideB * kHeightB); \
memset(dst_argb_opt, 123, kStrideB * kHeightB); \
MaskCpuFlags(disable_cpu_flags_); \
FMT_A##To##FMT_B((TYPE_A*)src_argb, kStrideA, (TYPE_B*)dst_argb_c, \
kStrideB, kWidth, kHeight); \
MaskCpuFlags(benchmark_cpu_info_); \
FMT_A##To##FMT_B((TYPE_A*)src_argb, kStrideA, (TYPE_B*)dst_argb_opt, \
kStrideB, kWidth, kHeight); \
for (int i = 0; i < kStrideB * kHeightB * (int)sizeof(TYPE_B); ++i) { \
ASSERT_EQ(dst_argb_c[i], dst_argb_opt[i]); \
} \
free_aligned_buffer_page_end(src_argb); \
free_aligned_buffer_page_end(dst_argb_c); \
free_aligned_buffer_page_end(dst_argb_opt); \
} \
#define TESTATOBRANDOM(FMT_A, TYPE_A, EPP_A, STRIDE_A, HEIGHT_A, FMT_B, \
TYPE_B, EPP_B, STRIDE_B, HEIGHT_B) \
TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##_Random) { \
for (int times = 0; times < benchmark_iterations_; ++times) { \
const int kWidth = (fastrand() & 63) + 1; \
const int kHeight = (fastrand() & 31) + 1; \
const int kHeightA = (kHeight + HEIGHT_A - 1) / HEIGHT_A * HEIGHT_A; \
const int kHeightB = (kHeight + HEIGHT_B - 1) / HEIGHT_B * HEIGHT_B; \
const int kStrideA = \
(kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
const int kStrideB = \
(kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
align_buffer_page_end(src_argb, kStrideA* kHeightA*(int)sizeof(TYPE_A)); \
align_buffer_page_end(dst_argb_c, \
kStrideB* kHeightB*(int)sizeof(TYPE_B)); \
align_buffer_page_end(dst_argb_opt, \
kStrideB* kHeightB*(int)sizeof(TYPE_B)); \
for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \
src_argb[i] = 0xfe; \
} \
memset(dst_argb_c, 123, kStrideB* kHeightB); \
memset(dst_argb_opt, 123, kStrideB* kHeightB); \
MaskCpuFlags(disable_cpu_flags_); \
FMT_A##To##FMT_B((TYPE_A*)src_argb, kStrideA, (TYPE_B*)dst_argb_c, \
kStrideB, kWidth, kHeight); \
MaskCpuFlags(benchmark_cpu_info_); \
FMT_A##To##FMT_B((TYPE_A*)src_argb, kStrideA, (TYPE_B*)dst_argb_opt, \
kStrideB, kWidth, kHeight); \
for (int i = 0; i < kStrideB * kHeightB * (int)sizeof(TYPE_B); ++i) { \
EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \
} \
free_aligned_buffer_page_end(src_argb); \
free_aligned_buffer_page_end(dst_argb_c); \
free_aligned_buffer_page_end(dst_argb_opt); \
} \
}
#if defined(ENABLE_FULL_TESTS)
@ -678,11 +672,11 @@ TESTATOB(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
const int kStrideB = \
(kWidth * EPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
align_buffer_page_end(src_argb, \
kStrideA * kHeightA * (int)sizeof(TYPE_A) + OFF); \
kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \
align_buffer_page_end(dst_argb_c, \
kStrideA * kHeightA * (int)sizeof(TYPE_A) + OFF); \
kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \
align_buffer_page_end(dst_argb_opt, \
kStrideA * kHeightA * (int)sizeof(TYPE_A) + OFF); \
kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \
for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \
src_argb[i + OFF] = (fastrand() & 0xff); \
} \
@ -703,7 +697,7 @@ TESTATOB(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
FMT_A##To##FMT_B((TYPE_A*)(dst_argb_opt /* src */ + OFF), kStrideA, \
(TYPE_B*)dst_argb_opt, kStrideB, kWidth, NEG kHeight); \
for (int i = 0; i < kStrideB * kHeightB * (int)sizeof(TYPE_B); ++i) { \
ASSERT_EQ(dst_argb_c[i], dst_argb_opt[i]); \
EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \
} \
free_aligned_buffer_page_end(src_argb); \
free_aligned_buffer_page_end(dst_argb_c); \
@ -797,14 +791,14 @@ TESTATOA(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
(kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
const int kStrideB = \
(kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
align_buffer_page_end(src_argb, kStrideA * kHeightA + OFF); \
align_buffer_page_end(dst_argb_c, kStrideB * kHeightB); \
align_buffer_page_end(dst_argb_opt, kStrideB * kHeightB); \
align_buffer_page_end(src_argb, kStrideA* kHeightA + OFF); \
align_buffer_page_end(dst_argb_c, kStrideB* kHeightB); \
align_buffer_page_end(dst_argb_opt, kStrideB* kHeightB); \
for (int i = 0; i < kStrideA * kHeightA; ++i) { \
src_argb[i + OFF] = (fastrand() & 0xff); \
} \
memset(dst_argb_c, 1, kStrideB * kHeightB); \
memset(dst_argb_opt, 101, kStrideB * kHeightB); \
memset(dst_argb_c, 1, kStrideB* kHeightB); \
memset(dst_argb_opt, 101, kStrideB* kHeightB); \
MaskCpuFlags(disable_cpu_flags_); \
FMT_A##To##FMT_B##Dither(src_argb + OFF, kStrideA, dst_argb_c, kStrideB, \
NULL, kWidth, NEG kHeight); \
@ -814,7 +808,7 @@ TESTATOA(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
kStrideB, NULL, kWidth, NEG kHeight); \
} \
for (int i = 0; i < kStrideB * kHeightB; ++i) { \
ASSERT_EQ(dst_argb_c[i], dst_argb_opt[i]); \
EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \
} \
free_aligned_buffer_page_end(src_argb); \
free_aligned_buffer_page_end(dst_argb_c); \
@ -833,14 +827,14 @@ TESTATOA(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
(kWidth * BPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
const int kStrideB = \
(kWidth * BPP_B + STRIDE_B - 1) / STRIDE_B * STRIDE_B; \
align_buffer_page_end(src_argb, kStrideA * kHeightA); \
align_buffer_page_end(dst_argb_c, kStrideB * kHeightB); \
align_buffer_page_end(dst_argb_opt, kStrideB * kHeightB); \
align_buffer_page_end(src_argb, kStrideA* kHeightA); \
align_buffer_page_end(dst_argb_c, kStrideB* kHeightB); \
align_buffer_page_end(dst_argb_opt, kStrideB* kHeightB); \
for (int i = 0; i < kStrideA * kHeightA; ++i) { \
src_argb[i] = (fastrand() & 0xff); \
} \
memset(dst_argb_c, 123, kStrideB * kHeightB); \
memset(dst_argb_opt, 123, kStrideB * kHeightB); \
memset(dst_argb_c, 123, kStrideB* kHeightB); \
memset(dst_argb_opt, 123, kStrideB* kHeightB); \
MaskCpuFlags(disable_cpu_flags_); \
FMT_A##To##FMT_B##Dither(src_argb, kStrideA, dst_argb_c, kStrideB, NULL, \
kWidth, kHeight); \
@ -848,7 +842,7 @@ TESTATOA(AB64, uint16_t, 4, 4, 1, AR64, uint16_t, 4, 4, 1)
FMT_A##To##FMT_B##Dither(src_argb, kStrideA, dst_argb_opt, kStrideB, \
NULL, kWidth, kHeight); \
for (int i = 0; i < kStrideB * kHeightB; ++i) { \
ASSERT_EQ(dst_argb_c[i], dst_argb_opt[i]); \
EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \
} \
free_aligned_buffer_page_end(src_argb); \
free_aligned_buffer_page_end(dst_argb_c); \
@ -891,16 +885,15 @@ TESTATOBD(ARGB, 4, 4, 1, RGB565, 2, 2, 1)
const int kStrideA = \
(kWidth * EPP_A + STRIDE_A - 1) / STRIDE_A * STRIDE_A; \
align_buffer_page_end(src_argb, \
kStrideA * kHeightA * (int)sizeof(TYPE_A) + OFF); \
align_buffer_page_end(dst_argb_c, \
kStrideA * kHeightA * (int)sizeof(TYPE_A)); \
kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \
align_buffer_page_end(dst_argb_c, kStrideA* kHeightA*(int)sizeof(TYPE_A)); \
align_buffer_page_end(dst_argb_opt, \
kStrideA * kHeightA * (int)sizeof(TYPE_A)); \
kStrideA* kHeightA*(int)sizeof(TYPE_A)); \
for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \
src_argb[i + OFF] = (fastrand() & 0xff); \
} \
memset(dst_argb_c, 1, kStrideA * kHeightA); \
memset(dst_argb_opt, 101, kStrideA * kHeightA); \
memset(dst_argb_c, 1, kStrideA* kHeightA); \
memset(dst_argb_opt, 101, kStrideA* kHeightA); \
MaskCpuFlags(disable_cpu_flags_); \
FMT_ATOB((TYPE_A*)(src_argb + OFF), kStrideA, (TYPE_A*)dst_argb_c, \
kStrideA, kWidth, NEG kHeight); \
@ -916,8 +909,8 @@ TESTATOBD(ARGB, 4, 4, 1, RGB565, 2, 2, 1)
FMT_ATOB((TYPE_A*)dst_argb_opt, kStrideA, (TYPE_A*)dst_argb_opt, kStrideA, \
kWidth, NEG kHeight); \
for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \
ASSERT_EQ(src_argb[i + OFF], dst_argb_opt[i]); \
ASSERT_EQ(dst_argb_c[i], dst_argb_opt[i]); \
EXPECT_EQ(src_argb[i + OFF], dst_argb_opt[i]); \
EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); \
} \
free_aligned_buffer_page_end(src_argb); \
free_aligned_buffer_page_end(dst_argb_c); \
@ -952,12 +945,12 @@ TESTEND(AB64ToAR64, uint16_t, 4, 4, 1)
const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
align_buffer_page_end(src_y, kWidth * kHeight + OFF); \
align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
align_buffer_page_end(src_u, kSizeUV + OFF); \
align_buffer_page_end(src_v, kSizeUV + OFF); \
align_buffer_page_end(src_a, kWidth * kHeight + OFF); \
align_buffer_page_end(dst_argb_c, kStrideB * kHeight + OFF); \
align_buffer_page_end(dst_argb_opt, kStrideB * kHeight + OFF); \
align_buffer_page_end(src_a, kWidth* kHeight + OFF); \
align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF); \
align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF); \
for (int i = 0; i < kWidth * kHeight; ++i) { \
src_y[i + OFF] = (fastrand() & 0xff); \
src_a[i + OFF] = (fastrand() & 0xff); \
@ -981,7 +974,7 @@ TESTEND(AB64ToAR64, uint16_t, 4, 4, 1)
ATTEN); \
} \
for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \
ASSERT_EQ(dst_argb_c[i + OFF], dst_argb_opt[i + OFF]); \
EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_opt[i + OFF]); \
} \
free_aligned_buffer_page_end(src_y); \
free_aligned_buffer_page_end(src_u); \
@ -1171,7 +1164,7 @@ TEST_F(LibYUVConvertTest, TestYToARGB) {
argb[i * 4 + 0], argb[i * 4 + 1], argb[i * 4 + 2], argb[i * 4 + 3]);
}
for (int i = 0; i < 32; ++i) {
ASSERT_EQ(expectedg[i], argb[i * 4 + 0]);
EXPECT_EQ(expectedg[i], argb[i * 4 + 0]);
}
}
@ -1193,7 +1186,7 @@ TEST_F(LibYUVConvertTest, TestNoDither) {
benchmark_width_ * 2, kNoDither4x4, benchmark_width_,
benchmark_height_);
for (int i = 0; i < benchmark_width_ * benchmark_height_ * 2; ++i) {
ASSERT_EQ(dst_rgb565[i], dst_rgb565dither[i]);
EXPECT_EQ(dst_rgb565[i], dst_rgb565dither[i]);
}
free_aligned_buffer_page_end(src_argb);
@ -1230,7 +1223,7 @@ TEST_F(LibYUVConvertTest, TestDither) {
benchmark_width_ * 4, benchmark_width_, benchmark_height_);
for (int i = 0; i < benchmark_width_ * benchmark_height_ * 4; ++i) {
ASSERT_NEAR(dst_argb[i], dst_argbdither[i], 9);
EXPECT_NEAR(dst_argb[i], dst_argbdither[i], 9);
}
free_aligned_buffer_page_end(src_argb);
free_aligned_buffer_page_end(dst_rgb565);
@ -1247,11 +1240,11 @@ TEST_F(LibYUVConvertTest, TestDither) {
const int kStrideB = ALIGNINT(kWidth * BPP_B, ALIGN); \
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
align_buffer_page_end(src_y, kWidth * kHeight + OFF); \
align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
align_buffer_page_end(src_u, kSizeUV + OFF); \
align_buffer_page_end(src_v, kSizeUV + OFF); \
align_buffer_page_end(dst_argb_c, kStrideB * kHeight + OFF); \
align_buffer_page_end(dst_argb_opt, kStrideB * kHeight + OFF); \
align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF); \
align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF); \
for (int i = 0; i < kWidth * kHeight; ++i) { \
src_y[i + OFF] = (fastrand() & 0xff); \
} \
@ -1272,16 +1265,16 @@ TEST_F(LibYUVConvertTest, TestDither) {
dst_argb_opt + OFF, kStrideB, NULL, kWidth, NEG kHeight); \
} \
/* Convert to ARGB so 565 is expanded to bytes that can be compared. */ \
align_buffer_page_end(dst_argb32_c, kWidth * BPP_C * kHeight); \
align_buffer_page_end(dst_argb32_opt, kWidth * BPP_C * kHeight); \
memset(dst_argb32_c, 2, kWidth * BPP_C * kHeight); \
memset(dst_argb32_opt, 102, kWidth * BPP_C * kHeight); \
align_buffer_page_end(dst_argb32_c, kWidth* BPP_C* kHeight); \
align_buffer_page_end(dst_argb32_opt, kWidth* BPP_C* kHeight); \
memset(dst_argb32_c, 2, kWidth* BPP_C* kHeight); \
memset(dst_argb32_opt, 102, kWidth* BPP_C* kHeight); \
FMT_B##To##FMT_C(dst_argb_c + OFF, kStrideB, dst_argb32_c, kWidth * BPP_C, \
kWidth, kHeight); \
FMT_B##To##FMT_C(dst_argb_opt + OFF, kStrideB, dst_argb32_opt, \
kWidth * BPP_C, kWidth, kHeight); \
for (int i = 0; i < kWidth * BPP_C * kHeight; ++i) { \
ASSERT_EQ(dst_argb32_c[i], dst_argb32_opt[i]); \
EXPECT_EQ(dst_argb32_c[i], dst_argb32_opt[i]); \
} \
free_aligned_buffer_page_end(src_y); \
free_aligned_buffer_page_end(src_u); \
@ -1324,10 +1317,10 @@ TESTPLANARTOBD(I420, 2, 2, RGB565, 2, 2, 1, ARGB, 4)
const int kStrideB = SUBSAMPLE(kWidth, SUB_B) * BPP_B; \
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
align_buffer_page_end(src_y, kWidth * kHeight + OFF); \
align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
align_buffer_page_end(src_u, kSizeUV + OFF); \
align_buffer_page_end(src_v, kSizeUV + OFF); \
align_buffer_page_end(dst_argb_b, kStrideB * kHeight + OFF); \
align_buffer_page_end(dst_argb_b, kStrideB* kHeight + OFF); \
for (int i = 0; i < kWidth * kHeight; ++i) { \
src_y[i + OFF] = (fastrand() & 0xff); \
} \
@ -1341,8 +1334,8 @@ TESTPLANARTOBD(I420, 2, 2, RGB565, 2, 2, 1, ARGB, 4)
kWidth, NEG kHeight); \
/* Convert to a 3rd format in 1 step and 2 steps and compare */ \
const int kStrideC = kWidth * BPP_C; \
align_buffer_page_end(dst_argb_c, kStrideC * kHeight + OFF); \
align_buffer_page_end(dst_argb_bc, kStrideC * kHeight + OFF); \
align_buffer_page_end(dst_argb_c, kStrideC* kHeight + OFF); \
align_buffer_page_end(dst_argb_bc, kStrideC* kHeight + OFF); \
memset(dst_argb_c + OFF, 2, kStrideC * kHeight); \
memset(dst_argb_bc + OFF, 3, kStrideC * kHeight); \
for (int i = 0; i < benchmark_iterations_; ++i) { \
@ -1354,7 +1347,7 @@ TESTPLANARTOBD(I420, 2, 2, RGB565, 2, 2, 1, ARGB, 4)
kStrideC, kWidth, kHeight); \
} \
for (int i = 0; i < kStrideC * kHeight; ++i) { \
ASSERT_EQ(dst_argb_c[i + OFF], dst_argb_bc[i + OFF]); \
EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_bc[i + OFF]); \
} \
free_aligned_buffer_page_end(src_y); \
free_aligned_buffer_page_end(src_u); \
@ -1471,14 +1464,14 @@ TESTPLANARTOE(I444, 1, 1, ABGR, 1, 4, ARGB, 4)
const int kStrideB = SUBSAMPLE(kWidth, SUB_B) * BPP_B; \
const int kSizeUV = \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y); \
align_buffer_page_end(src_y, kWidth * kHeight + OFF); \
align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
align_buffer_page_end(src_u, kSizeUV + OFF); \
align_buffer_page_end(src_v, kSizeUV + OFF); \
align_buffer_page_end(src_a, kWidth * kHeight + OFF); \
align_buffer_page_end(dst_argb_b, kStrideB * kHeight + OFF); \
align_buffer_page_end(src_a, kWidth* kHeight + OFF); \
align_buffer_page_end(dst_argb_b, kStrideB* kHeight + OFF); \
const int kStrideC = kWidth * BPP_C; \
align_buffer_page_end(dst_argb_c, kStrideC * kHeight + OFF); \
align_buffer_page_end(dst_argb_bc, kStrideC * kHeight + OFF); \
align_buffer_page_end(dst_argb_c, kStrideC* kHeight + OFF); \
align_buffer_page_end(dst_argb_bc, kStrideC* kHeight + OFF); \
memset(dst_argb_c + OFF, 2, kStrideC * kHeight); \
memset(dst_argb_b + OFF, 1, kStrideB * kHeight); \
memset(dst_argb_bc + OFF, 3, kStrideC * kHeight); \
@ -1506,7 +1499,7 @@ TESTPLANARTOE(I444, 1, 1, ABGR, 1, 4, ARGB, 4)
src_v + OFF, SUBSAMPLE(kWidth, SUBSAMP_X), src_a + OFF, kWidth, \
dst_argb_c + OFF, kStrideC, kWidth, NEG kHeight, ATTEN); \
for (int i = 0; i < kStrideC * kHeight; ++i) { \
ASSERT_EQ(dst_argb_c[i + OFF], dst_argb_bc[i + OFF]); \
EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_bc[i + OFF]); \
} \
free_aligned_buffer_page_end(src_y); \
free_aligned_buffer_page_end(src_u); \
@ -1585,16 +1578,16 @@ TESTQPLANARTOE(I444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4)
const int kHeight = benchmark_height_; \
const int kStrideA = SUBSAMPLE(kWidth, SUB_A) * BPP_A; \
const int kStrideB = SUBSAMPLE(kWidth, SUB_B) * BPP_B; \
align_buffer_page_end(src_argb_a, kStrideA * kHeight + OFF); \
align_buffer_page_end(dst_argb_b, kStrideB * kHeight + OFF); \
align_buffer_page_end(src_argb_a, kStrideA* kHeight + OFF); \
align_buffer_page_end(dst_argb_b, kStrideB* kHeight + OFF); \
MemRandomize(src_argb_a + OFF, kStrideA * kHeight); \
memset(dst_argb_b + OFF, 1, kStrideB * kHeight); \
FMT_A##To##FMT_B(src_argb_a + OFF, kStrideA, dst_argb_b + OFF, kStrideB, \
kWidth, NEG kHeight); \
/* Convert to a 3rd format in 1 step and 2 steps and compare */ \
const int kStrideC = kWidth * BPP_C; \
align_buffer_page_end(dst_argb_c, kStrideC * kHeight + OFF); \
align_buffer_page_end(dst_argb_bc, kStrideC * kHeight + OFF); \
align_buffer_page_end(dst_argb_c, kStrideC* kHeight + OFF); \
align_buffer_page_end(dst_argb_bc, kStrideC* kHeight + OFF); \
memset(dst_argb_c + OFF, 2, kStrideC * kHeight); \
memset(dst_argb_bc + OFF, 3, kStrideC * kHeight); \
for (int i = 0; i < benchmark_iterations_; ++i) { \
@ -1605,10 +1598,10 @@ TESTQPLANARTOE(I444Alpha, 1, 1, ABGR, 1, 4, ARGB, 4)
kStrideC, kWidth, kHeight); \
} \
for (int i = 0; i < kStrideC * kHeight; i += 4) { \
ASSERT_EQ(dst_argb_c[i + OFF + 0], dst_argb_bc[i + OFF + 0]); \
ASSERT_EQ(dst_argb_c[i + OFF + 1], dst_argb_bc[i + OFF + 1]); \
ASSERT_EQ(dst_argb_c[i + OFF + 2], dst_argb_bc[i + OFF + 2]); \
ASSERT_NEAR(dst_argb_c[i + OFF + 3], dst_argb_bc[i + OFF + 3], 64); \
EXPECT_EQ(dst_argb_c[i + OFF + 0], dst_argb_bc[i + OFF + 0]); \
EXPECT_EQ(dst_argb_c[i + OFF + 1], dst_argb_bc[i + OFF + 1]); \
EXPECT_EQ(dst_argb_c[i + OFF + 2], dst_argb_bc[i + OFF + 2]); \
EXPECT_NEAR(dst_argb_c[i + OFF + 3], dst_argb_bc[i + OFF + 3], 64); \
} \
free_aligned_buffer_page_end(src_argb_a); \
free_aligned_buffer_page_end(dst_argb_b); \
@ -1671,12 +1664,12 @@ TEST_F(LibYUVConvertTest, RotateWithARGBSource) {
2, // crop height
kRotate90, FOURCC_ARGB);
ASSERT_EQ(r, 0);
EXPECT_EQ(r, 0);
// 90 degrees rotation, no conversion
ASSERT_EQ(dst[0], src[2]);
ASSERT_EQ(dst[1], src[0]);
ASSERT_EQ(dst[2], src[3]);
ASSERT_EQ(dst[3], src[1]);
EXPECT_EQ(dst[0], src[2]);
EXPECT_EQ(dst[1], src[0]);
EXPECT_EQ(dst[2], src[3]);
EXPECT_EQ(dst[3], src[1]);
}
#ifdef HAS_ARGBTOAR30ROW_AVX2
@ -1704,7 +1697,7 @@ TEST_F(LibYUVConvertTest, ARGBToAR30Row_Opt) {
}
}
for (int i = 0; i < kPixels * 4; ++i) {
ASSERT_EQ(dst_opt[i], dst_c[i]);
EXPECT_EQ(dst_opt[i], dst_c[i]);
}
free_aligned_buffer_page_end(src);
@ -1738,7 +1731,7 @@ TEST_F(LibYUVConvertTest, ABGRToAR30Row_Opt) {
}
}
for (int i = 0; i < kPixels * 4; ++i) {
ASSERT_EQ(dst_opt[i], dst_c[i]);
EXPECT_EQ(dst_opt[i], dst_c[i]);
}
free_aligned_buffer_page_end(src);
@ -1805,11 +1798,11 @@ TEST_F(LibYUVConvertTest, ABGRToAR30Row_Opt) {
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
const int kBpc = 2; \
align_buffer_page_end(src_y, kWidth * kHeight * kBpc + SOFF); \
align_buffer_page_end(src_u, kSizeUV * kBpc + SOFF); \
align_buffer_page_end(src_v, kSizeUV * kBpc + SOFF); \
align_buffer_page_end(dst_argb_c, kStrideB * kHeight + DOFF); \
align_buffer_page_end(dst_argb_opt, kStrideB * kHeight + DOFF); \
align_buffer_page_end(src_y, kWidth* kHeight* kBpc + SOFF); \
align_buffer_page_end(src_u, kSizeUV* kBpc + SOFF); \
align_buffer_page_end(src_v, kSizeUV* kBpc + SOFF); \
align_buffer_page_end(dst_argb_c, kStrideB* kHeight + DOFF); \
align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + DOFF); \
for (int i = 0; i < kWidth * kHeight; ++i) { \
reinterpret_cast<uint16_t*>(src_y + SOFF)[i] = (fastrand() & FMT_MASK); \
} \
@ -1834,7 +1827,7 @@ TEST_F(LibYUVConvertTest, ABGRToAR30Row_Opt) {
dst_argb_opt + DOFF, kStrideB, kWidth, NEG kHeight); \
} \
for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \
ASSERT_EQ(dst_argb_c[i + DOFF], dst_argb_opt[i + DOFF]); \
EXPECT_EQ(dst_argb_c[i + DOFF], dst_argb_opt[i + DOFF]); \
} \
free_aligned_buffer_page_end(src_y); \
free_aligned_buffer_page_end(src_u); \
@ -1920,12 +1913,12 @@ TESTPLANAR16TOB(I210, 2, 1, 0x3ff, AR30Filter, 4, 4, 1)
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X); \
const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
const int kBpc = 2; \
align_buffer_page_end(src_y, kWidth * kHeight * kBpc + OFF); \
align_buffer_page_end(src_u, kSizeUV * kBpc + OFF); \
align_buffer_page_end(src_v, kSizeUV * kBpc + OFF); \
align_buffer_page_end(src_a, kWidth * kHeight * kBpc + OFF); \
align_buffer_page_end(dst_argb_c, kStrideB * kHeight + OFF); \
align_buffer_page_end(dst_argb_opt, kStrideB * kHeight + OFF); \
align_buffer_page_end(src_y, kWidth* kHeight* kBpc + OFF); \
align_buffer_page_end(src_u, kSizeUV* kBpc + OFF); \
align_buffer_page_end(src_v, kSizeUV* kBpc + OFF); \
align_buffer_page_end(src_a, kWidth* kHeight* kBpc + OFF); \
align_buffer_page_end(dst_argb_c, kStrideB* kHeight + OFF); \
align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + OFF); \
for (int i = 0; i < kWidth * kHeight; ++i) { \
reinterpret_cast<uint16_t*>(src_y + OFF)[i] = \
(fastrand() & ((1 << S_DEPTH) - 1)); \
@ -1957,7 +1950,7 @@ TESTPLANAR16TOB(I210, 2, 1, 0x3ff, AR30Filter, 4, 4, 1)
dst_argb_opt + OFF, kStrideB, kWidth, NEG kHeight, ATTEN); \
} \
for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \
ASSERT_EQ(dst_argb_c[i + OFF], dst_argb_opt[i + OFF]); \
EXPECT_EQ(dst_argb_c[i + OFF], dst_argb_opt[i + OFF]); \
} \
free_aligned_buffer_page_end(src_y); \
free_aligned_buffer_page_end(src_u); \
@ -2153,10 +2146,10 @@ TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10)
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X) * 2; \
const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y) * 2; \
const int kBpc = 2; \
align_buffer_page_end(src_y, kWidth * kHeight * kBpc + SOFF); \
align_buffer_page_end(src_uv, kSizeUV * kBpc + SOFF); \
align_buffer_page_end(dst_argb_c, kStrideB * kHeight + DOFF); \
align_buffer_page_end(dst_argb_opt, kStrideB * kHeight + DOFF); \
align_buffer_page_end(src_y, kWidth* kHeight* kBpc + SOFF); \
align_buffer_page_end(src_uv, kSizeUV* kBpc + SOFF); \
align_buffer_page_end(dst_argb_c, kStrideB* kHeight + DOFF); \
align_buffer_page_end(dst_argb_opt, kStrideB* kHeight + DOFF); \
for (int i = 0; i < kWidth * kHeight; ++i) { \
reinterpret_cast<uint16_t*>(src_y + SOFF)[i] = \
(fastrand() & (((uint16_t)(-1)) << (16 - S_DEPTH))); \
@ -2180,7 +2173,7 @@ TESTQPLANAR16TOB(I210Alpha, 2, 1, ARGBFilter, 4, 4, 1, 10)
NEG kHeight); \
} \
for (int i = 0; i < kWidth * BPP_B * kHeight; ++i) { \
ASSERT_EQ(dst_argb_c[i + DOFF], dst_argb_opt[i + DOFF]); \
EXPECT_EQ(dst_argb_c[i + DOFF], dst_argb_opt[i + DOFF]); \
} \
free_aligned_buffer_page_end(src_y); \
free_aligned_buffer_page_end(src_uv); \
@ -2323,10 +2316,10 @@ TEST_F(LibYUVConvertTest, TestH420ToARGB) {
++histogram_r[r];
// Reference formula for Y channel contribution in YUV to RGB conversions:
int expected_y = Clamp(static_cast<int>((i - 16) * 1.164f + 0.5f));
ASSERT_EQ(b, expected_y);
ASSERT_EQ(g, expected_y);
ASSERT_EQ(r, expected_y);
ASSERT_EQ(a, 255);
EXPECT_EQ(b, expected_y);
EXPECT_EQ(g, expected_y);
EXPECT_EQ(r, expected_y);
EXPECT_EQ(a, 255);
}
int count_b = 0;
@ -2384,10 +2377,10 @@ TEST_F(LibYUVConvertTest, TestH010ToARGB) {
++histogram_g[g];
++histogram_r[r];
int expected_y = Clamp(static_cast<int>((i - 64) * 1.164f / 4));
ASSERT_NEAR(b, expected_y, 1);
ASSERT_NEAR(g, expected_y, 1);
ASSERT_NEAR(r, expected_y, 1);
ASSERT_EQ(a, 255);
EXPECT_NEAR(b, expected_y, 1);
EXPECT_NEAR(g, expected_y, 1);
EXPECT_NEAR(r, expected_y, 1);
EXPECT_EQ(a, 255);
}
int count_b = 0;
@ -2448,10 +2441,10 @@ TEST_F(LibYUVConvertTest, TestH010ToAR30) {
++histogram_g[g10];
++histogram_r[r10];
int expected_y = Clamp10(static_cast<int>((i - 64) * 1.164f + 0.5));
ASSERT_NEAR(b10, expected_y, 4);
ASSERT_NEAR(g10, expected_y, 4);
ASSERT_NEAR(r10, expected_y, 4);
ASSERT_EQ(a2, 3);
EXPECT_NEAR(b10, expected_y, 4);
EXPECT_NEAR(g10, expected_y, 4);
EXPECT_NEAR(r10, expected_y, 4);
EXPECT_EQ(a2, 3);
}
int count_b = 0;
@ -2512,10 +2505,10 @@ TEST_F(LibYUVConvertTest, TestH010ToAB30) {
++histogram_g[g10];
++histogram_r[r10];
int expected_y = Clamp10(static_cast<int>((i - 64) * 1.164f));
ASSERT_NEAR(b10, expected_y, 4);
ASSERT_NEAR(g10, expected_y, 4);
ASSERT_NEAR(r10, expected_y, 4);
ASSERT_EQ(a2, 3);
EXPECT_NEAR(b10, expected_y, 4);
EXPECT_NEAR(g10, expected_y, 4);
EXPECT_NEAR(r10, expected_y, 4);
EXPECT_EQ(a2, 3);
}
int count_b = 0;
@ -2574,10 +2567,10 @@ TEST_F(LibYUVConvertTest, TestH420ToAR30) {
++histogram_g[g10];
++histogram_r[r10];
int expected_y = Clamp10(static_cast<int>((i - 16) * 1.164f * 4.f));
ASSERT_NEAR(b10, expected_y, 4);
ASSERT_NEAR(g10, expected_y, 4);
ASSERT_NEAR(r10, expected_y, 4);
ASSERT_EQ(a2, 3);
EXPECT_NEAR(b10, expected_y, 4);
EXPECT_NEAR(g10, expected_y, 4);
EXPECT_NEAR(r10, expected_y, 4);
EXPECT_EQ(a2, 3);
}
int count_b = 0;
@ -2624,34 +2617,34 @@ TEST_F(LibYUVConvertTest, TestI400) {
I400ToARGBMatrix(orig_i400, 0, argb_pixels_2020_i400, 0, &kYuv2020Constants,
kSize, 1);
ASSERT_EQ(0, argb_pixels_i400[0]);
ASSERT_EQ(0, argb_pixels_j400[0]);
ASSERT_EQ(0, argb_pixels_jpeg_i400[0]);
ASSERT_EQ(0, argb_pixels_h709_i400[0]);
ASSERT_EQ(0, argb_pixels_2020_i400[0]);
ASSERT_EQ(0, argb_pixels_i400[16 * 4]);
ASSERT_EQ(16, argb_pixels_j400[16 * 4]);
ASSERT_EQ(16, argb_pixels_jpeg_i400[16 * 4]);
ASSERT_EQ(0, argb_pixels_h709_i400[16 * 4]);
ASSERT_EQ(0, argb_pixels_2020_i400[16 * 4]);
ASSERT_EQ(130, argb_pixels_i400[128 * 4]);
ASSERT_EQ(128, argb_pixels_j400[128 * 4]);
ASSERT_EQ(128, argb_pixels_jpeg_i400[128 * 4]);
ASSERT_EQ(130, argb_pixels_h709_i400[128 * 4]);
ASSERT_EQ(130, argb_pixels_2020_i400[128 * 4]);
ASSERT_EQ(255, argb_pixels_i400[255 * 4]);
ASSERT_EQ(255, argb_pixels_j400[255 * 4]);
ASSERT_EQ(255, argb_pixels_jpeg_i400[255 * 4]);
ASSERT_EQ(255, argb_pixels_h709_i400[255 * 4]);
ASSERT_EQ(255, argb_pixels_2020_i400[255 * 4]);
EXPECT_EQ(0, argb_pixels_i400[0]);
EXPECT_EQ(0, argb_pixels_j400[0]);
EXPECT_EQ(0, argb_pixels_jpeg_i400[0]);
EXPECT_EQ(0, argb_pixels_h709_i400[0]);
EXPECT_EQ(0, argb_pixels_2020_i400[0]);
EXPECT_EQ(0, argb_pixels_i400[16 * 4]);
EXPECT_EQ(16, argb_pixels_j400[16 * 4]);
EXPECT_EQ(16, argb_pixels_jpeg_i400[16 * 4]);
EXPECT_EQ(0, argb_pixels_h709_i400[16 * 4]);
EXPECT_EQ(0, argb_pixels_2020_i400[16 * 4]);
EXPECT_EQ(130, argb_pixels_i400[128 * 4]);
EXPECT_EQ(128, argb_pixels_j400[128 * 4]);
EXPECT_EQ(128, argb_pixels_jpeg_i400[128 * 4]);
EXPECT_EQ(130, argb_pixels_h709_i400[128 * 4]);
EXPECT_EQ(130, argb_pixels_2020_i400[128 * 4]);
EXPECT_EQ(255, argb_pixels_i400[255 * 4]);
EXPECT_EQ(255, argb_pixels_j400[255 * 4]);
EXPECT_EQ(255, argb_pixels_jpeg_i400[255 * 4]);
EXPECT_EQ(255, argb_pixels_h709_i400[255 * 4]);
EXPECT_EQ(255, argb_pixels_2020_i400[255 * 4]);
for (int i = 0; i < kSize * 4; ++i) {
if ((i & 3) == 3) {
ASSERT_EQ(255, argb_pixels_j400[i]);
EXPECT_EQ(255, argb_pixels_j400[i]);
} else {
ASSERT_EQ(i / 4, argb_pixels_j400[i]);
EXPECT_EQ(i / 4, argb_pixels_j400[i]);
}
ASSERT_EQ(argb_pixels_jpeg_i400[i], argb_pixels_j400[i]);
EXPECT_EQ(argb_pixels_jpeg_i400[i], argb_pixels_j400[i]);
}
free_aligned_buffer_page_end(orig_i400);
@ -2678,7 +2671,7 @@ TEST_F(LibYUVConvertTest, TestARGBToRGB24) {
ARGBToRGB24(argb_pixels, 0, dest_rgb24, 0, kSize, 1);
for (int i = 0; i < kSize * 3; ++i) {
ASSERT_EQ(orig_rgb24[i], dest_rgb24[i]);
EXPECT_EQ(orig_rgb24[i], dest_rgb24[i]);
}
free_aligned_buffer_page_end(orig_rgb24);
@ -2697,7 +2690,7 @@ TEST_F(LibYUVConvertTest, TestARGBToRGB565) {
}
ARGBToRGB565(&orig_pixels[0][0], 0, &dest_rgb565[0][0], 0, 256, 1);
uint32_t checksum = HashDjb2(&dest_rgb565[0][0], sizeof(dest_rgb565), 5381);
ASSERT_EQ(610919429u, checksum);
EXPECT_EQ(610919429u, checksum);
}
TEST_F(LibYUVConvertTest, TestYUY2ToARGB) {
@ -2712,9 +2705,9 @@ TEST_F(LibYUVConvertTest, TestYUY2ToARGB) {
YUY2ToARGB(&orig_pixels[0][0], 0, &dest_argb[0][0], 0, 256, 1);
uint32_t checksum = HashDjb2(&dest_argb[0][0], sizeof(dest_argb), 5381);
#if defined(LIBYUV_UNLIMITED_DATA)
ASSERT_EQ(10343289u, checksum);
EXPECT_EQ(10343289u, checksum);
#else
ASSERT_EQ(3486643515u, checksum);
EXPECT_EQ(3486643515u, checksum);
#endif
}
@ -2730,9 +2723,9 @@ TEST_F(LibYUVConvertTest, TestUYVYToARGB) {
UYVYToARGB(&orig_pixels[0][0], 0, &dest_argb[0][0], 0, 256, 1);
uint32_t checksum = HashDjb2(&dest_argb[0][0], sizeof(dest_argb), 5381);
#if defined(LIBYUV_UNLIMITED_DATA)
ASSERT_EQ(10343289u, checksum);
EXPECT_EQ(10343289u, checksum);
#else
ASSERT_EQ(3486643515u, checksum);
EXPECT_EQ(3486643515u, checksum);
#endif
}
@ -2810,9 +2803,9 @@ TEST_F(LibYUVConvertTest, TestARGBToUVRow) {
printf("\n");
uint32_t checksum_u = HashDjb2(&dest_u[0], sizeof(dest_u), 5381);
ASSERT_EQ(192508756u, checksum_u);
EXPECT_EQ(192508756u, checksum_u);
uint32_t checksum_v = HashDjb2(&dest_v[0], sizeof(dest_v), 5381);
ASSERT_EQ(2590663990u, checksum_v);
EXPECT_EQ(2590663990u, checksum_v);
}
#endif
@ -2838,23 +2831,16 @@ TEST_F(LibYUVConvertTest, TestARGBToUVMatrixRow_Opt) {
memset(dest_v_c, 0, sizeof(dest_v_c));
memset(dest_u_opt, 0, sizeof(dest_u_opt));
memset(dest_v_opt, 0, sizeof(dest_v_opt));
int src_stride = (height == 1) ? 0 : kMaxWidth * 4;
ARGBToUVMatrixRow_C(&orig_argb_pixels[0], src_stride, &dest_u_c[0],
&dest_v_c[0], width, &kArgbI601Constants);
ARGBToUVMatrixRow_Any_NEON(&orig_argb_pixels[0], src_stride,
&dest_u_opt[0], &dest_v_opt[0], width,
&kArgbI601Constants);
ARGBToUVMatrixRow_C(&orig_argb_pixels[0], src_stride, &dest_u_c[0], &dest_v_c[0], width, &kArgbI601Constants);
ARGBToUVMatrixRow_Any_NEON(&orig_argb_pixels[0], src_stride, &dest_u_opt[0], &dest_v_opt[0], width, &kArgbI601Constants);
int half_width = (width + 1) / 2;
for (int i = 0; i < half_width; ++i) {
ASSERT_EQ(dest_u_c[i], dest_u_opt[i])
<< "u mismatch at " << i << " width " << width << " height "
<< height;
ASSERT_EQ(dest_v_c[i], dest_v_opt[i])
<< "v mismatch at " << i << " width " << width << " height "
<< height;
EXPECT_EQ(dest_u_c[i], dest_u_opt[i]) << "u mismatch at " << i << " width " << width << " height " << height;
EXPECT_EQ(dest_v_c[i], dest_v_opt[i]) << "v mismatch at " << i << " width " << width << " height " << height;
}
}
}
@ -2867,7 +2853,6 @@ TEST_F(LibYUVConvertTest, TestARGBToUVMatrixRow_Opt) {
(defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__))
// TODO(fbarchard): Consider _set_new_mode(0) to make malloc return NULL
#ifndef DISABLE_SLOW_TESTS
TEST_F(LibYUVConvertTest, TestI400LargeSize) {
// The width and height are chosen as follows:
// - kWidth * kHeight is not a multiple of 8: This lets us to use the Any
@ -2911,18 +2896,18 @@ TEST_F(LibYUVConvertTest, TestI400LargeSize) {
for (int i = 0; i < kWidth * kHeight; ++i) {
orig_i400[i] = i % 256;
}
ASSERT_EQ(I400ToARGBMatrix(orig_i400, kStride, dest_argb, kWidth,
EXPECT_EQ(I400ToARGBMatrix(orig_i400, kStride, dest_argb, kWidth,
&kYuvJPEGConstants, kWidth, kHeight),
0);
free_aligned_buffer_page_end(dest_argb);
free_aligned_buffer_page_end(orig_i400);
}
#endif // DISABLE_SLOW_TESTS
#endif // !defined(DISABLE_SLOW_TESTS) && \
// (defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__))
#endif // !defined(LEAN_TESTS)
#define TESTATOBPI(FMT_A, TYPE_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, SUBSAMP_X, \
SUBSAMP_Y, W1280, N, NEG, OFF) \
TEST_F(LibYUVConvertTest, FMT_A##To##FMT_B##N) { \
@ -2935,17 +2920,17 @@ TEST_F(LibYUVConvertTest, TestI400LargeSize) {
const int kStrideUV = SUBSAMPLE(kWidth, SUBSAMP_X) * 2; \
const int kSizeUV = kStrideUV * SUBSAMPLE(kHeight, SUBSAMP_Y); \
align_buffer_page_end(src_argb, \
kStrideA * kHeightA * (int)sizeof(TYPE_A) + OFF); \
align_buffer_page_end(dst_y_c, kStrideY * kHeight); \
kStrideA* kHeightA*(int)sizeof(TYPE_A) + OFF); \
align_buffer_page_end(dst_y_c, kStrideY* kHeight); \
align_buffer_page_end(dst_uv_c, kSizeUV); \
align_buffer_page_end(dst_y_opt, kStrideY * kHeight); \
align_buffer_page_end(dst_y_opt, kStrideY* kHeight); \
align_buffer_page_end(dst_uv_opt, kSizeUV); \
for (int i = 0; i < kStrideA * kHeightA * (int)sizeof(TYPE_A); ++i) { \
src_argb[i + OFF] = (fastrand() & 0xff); \
} \
memset(dst_y_c, 1, kStrideY * kHeight); \
memset(dst_y_c, 1, kStrideY* kHeight); \
memset(dst_uv_c, 2, kSizeUV); \
memset(dst_y_opt, 101, kStrideY * kHeight); \
memset(dst_y_opt, 101, kStrideY* kHeight); \
memset(dst_uv_opt, 102, kSizeUV); \
MaskCpuFlags(disable_cpu_flags_); \
FMT_A##To##FMT_B((TYPE_A*)(src_argb + OFF), kStrideA, dst_y_c, kStrideY, \
@ -2956,10 +2941,10 @@ TEST_F(LibYUVConvertTest, TestI400LargeSize) {
kStrideY, dst_uv_opt, kStrideUV, kWidth, NEG kHeight); \
} \
for (int i = 0; i < kStrideY * kHeight; ++i) { \
ASSERT_EQ(dst_y_c[i], dst_y_opt[i]); \
EXPECT_EQ(dst_y_c[i], dst_y_opt[i]); \
} \
for (int i = 0; i < kSizeUV; ++i) { \
ASSERT_EQ(dst_uv_c[i], dst_uv_opt[i]); \
EXPECT_EQ(dst_uv_c[i], dst_uv_opt[i]); \
} \
free_aligned_buffer_page_end(src_argb); \
free_aligned_buffer_page_end(dst_y_c); \

File diff suppressed because it is too large Load Diff

View File

@ -48,7 +48,7 @@ TEST_F(LibYUVBaseTest, TestCpuId) {
printf("Cpu Vendor: %s 0x%x 0x%x 0x%x\n",
reinterpret_cast<char*>(&cpu_info[0]), cpu_info[0], cpu_info[1],
cpu_info[2]);
ASSERT_EQ(12u, strlen(reinterpret_cast<char*>(&cpu_info[0])));
EXPECT_EQ(12u, strlen(reinterpret_cast<char*>(&cpu_info[0])));
// CPU Family and Model
// 3:0 - Stepping
@ -189,6 +189,7 @@ TEST_F(LibYUVBaseTest, TestCpuHas) {
int has_avxvnni = TestCpuFlag(kCpuHasAVXVNNI);
int has_avxvnniint8 = TestCpuFlag(kCpuHasAVXVNNIINT8);
int has_amxint8 = TestCpuFlag(kCpuHasAMXINT8);
int has_avx512bmm = TestCpuFlag(kCpuHasAVX512BMM);
printf("Has X86 0x%x\n", has_x86);
printf("Has SSE2 0x%x\n", has_sse2);
printf("Has SSSE3 0x%x\n", has_ssse3);
@ -211,6 +212,7 @@ TEST_F(LibYUVBaseTest, TestCpuHas) {
printf("HAS AVXVNNI 0x%x\n", has_avxvnni);
printf("Has AVXVNNIINT8 0x%x\n", has_avxvnniint8);
printf("Has AMXINT8 0x%x\n", has_amxint8);
printf("Has AVX512BMM 0x%x\n", has_avx512bmm);
}
#endif // defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) ||
// defined(_M_X64)
@ -327,8 +329,8 @@ TEST_F(LibYUVBaseTest, DISABLED_TestLinuxArm) {
if (FileExists("../../unit_test/testdata/arm_v7.txt")) {
printf("Note: testing to load \"../../unit_test/testdata/arm_v7.txt\"\n");
ASSERT_EQ(0, ArmCpuCaps("../../unit_test/testdata/arm_v7.txt"));
ASSERT_EQ(kCpuHasNEON, ArmCpuCaps("../../unit_test/testdata/tegra3.txt"));
EXPECT_EQ(0, ArmCpuCaps("../../unit_test/testdata/arm_v7.txt"));
EXPECT_EQ(kCpuHasNEON, ArmCpuCaps("../../unit_test/testdata/tegra3.txt"));
} else {
printf("WARNING: unable to load \"../../unit_test/testdata/arm_v7.txt\"\n");
}
@ -347,23 +349,23 @@ TEST_F(LibYUVBaseTest, DISABLED_TestLinuxArm) {
#if defined(__linux__) && defined(__aarch64__)
TEST_F(LibYUVBaseTest, TestLinuxAArch64) {
// Values taken from a Cortex-A57 machine, only Neon available.
ASSERT_EQ(kCpuHasNEON, AArch64CpuCaps(0xffU, 0x0U));
EXPECT_EQ(kCpuHasNEON, AArch64CpuCaps(0xffU, 0x0U));
// Values taken from a Google Pixel 7.
int expected = kCpuHasNEON | kCpuHasNeonDotProd;
ASSERT_EQ(expected, AArch64CpuCaps(0x119fffU, 0x0U));
EXPECT_EQ(expected, AArch64CpuCaps(0x119fffU, 0x0U));
// Values taken from a Google Pixel 8.
expected = kCpuHasNEON | kCpuHasNeonDotProd | kCpuHasNeonI8MM | kCpuHasSVE |
kCpuHasSVE2;
ASSERT_EQ(expected, AArch64CpuCaps(0x3fffffffU, 0x2f33fU));
EXPECT_EQ(expected, AArch64CpuCaps(0x3fffffffU, 0x2f33fU));
// Values taken from a Neoverse N2 machine.
ASSERT_EQ(expected, AArch64CpuCaps(0x3fffffffU, 0x2f3ffU));
EXPECT_EQ(expected, AArch64CpuCaps(0x3fffffffU, 0x2f3ffU));
// Check for SME feature detection.
expected |= kCpuHasSME;
ASSERT_EQ(expected, AArch64CpuCaps(0x3fffffffU, 0x82f3ffU));
EXPECT_EQ(expected, AArch64CpuCaps(0x3fffffffU, 0x82f3ffU));
// TODO: Check for SME2 feature detection from Apple M4
}
@ -373,10 +375,10 @@ TEST_F(LibYUVBaseTest, DISABLED_TestLinuxRVV) {
if (FileExists("../../unit_test/testdata/riscv64.txt")) {
printf("Note: testing to load \"../../unit_test/testdata/riscv64.txt\"\n");
ASSERT_EQ(0, RiscvCpuCaps("../../unit_test/testdata/riscv64.txt"));
ASSERT_EQ(kCpuHasRVV,
EXPECT_EQ(0, RiscvCpuCaps("../../unit_test/testdata/riscv64.txt"));
EXPECT_EQ(kCpuHasRVV,
RiscvCpuCaps("../../unit_test/testdata/riscv64_rvv.txt"));
ASSERT_EQ(kCpuHasRVV | kCpuHasRVVZVFH,
EXPECT_EQ(kCpuHasRVV | kCpuHasRVVZVFH,
RiscvCpuCaps("../../unit_test/testdata/riscv64_rvv_zvfh.txt"));
} else {
printf(
@ -410,15 +412,15 @@ TEST_F(LibYUVBaseTest, MAYBE_TestSetCpuFlags) {
// Test setting different CPU configurations.
int cpu_flags = kCpuHasARM | kCpuHasNEON | kCpuInitialized;
SetCpuFlags(cpu_flags);
ASSERT_EQ(cpu_flags, TestCpuFlag(-1));
EXPECT_EQ(cpu_flags, TestCpuFlag(-1));
cpu_flags = kCpuHasX86 | kCpuInitialized;
SetCpuFlags(cpu_flags);
ASSERT_EQ(cpu_flags, TestCpuFlag(-1));
EXPECT_EQ(cpu_flags, TestCpuFlag(-1));
// Test that setting 0 turns auto-init back on.
SetCpuFlags(0);
ASSERT_EQ(original_cpu_flags, TestCpuFlag(-1));
EXPECT_EQ(original_cpu_flags, TestCpuFlag(-1));
// Restore the CPU flag mask.
MaskCpuFlags(benchmark_cpu_info_);

View File

@ -51,10 +51,10 @@ TEST(LibYUVCpuThreadTest, TestCpuFlagMultipleThreads) {
ret = pthread_create(&thread2, nullptr, ThreadMain, &cpu_flags2);
ASSERT_EQ(ret, 0);
ret = pthread_join(thread1, nullptr);
ASSERT_EQ(ret, 0);
EXPECT_EQ(ret, 0);
ret = pthread_join(thread2, nullptr);
ASSERT_EQ(ret, 0);
ASSERT_EQ(cpu_flags1, cpu_flags2);
EXPECT_EQ(ret, 0);
EXPECT_EQ(cpu_flags1, cpu_flags2);
#else
printf("pthread unavailable; Test skipped.");
#endif // LIBYUV_HAVE_PTHREAD

View File

@ -30,44 +30,44 @@ TEST_F(LibYUVBaseTest, TestFixedDiv) {
int result_opt[1280];
int result_c[1280];
ASSERT_EQ(0x10000, libyuv::FixedDiv(1, 1));
ASSERT_EQ(0x7fff0000, libyuv::FixedDiv(0x7fff, 1));
EXPECT_EQ(0x10000, libyuv::FixedDiv(1, 1));
EXPECT_EQ(0x7fff0000, libyuv::FixedDiv(0x7fff, 1));
// TODO(fbarchard): Avoid the following that throw exceptions.
// ASSERT_EQ(0x100000000, libyuv::FixedDiv(0x10000, 1));
// ASSERT_EQ(0x80000000, libyuv::FixedDiv(0x8000, 1));
// EXPECT_EQ(0x100000000, libyuv::FixedDiv(0x10000, 1));
// EXPECT_EQ(0x80000000, libyuv::FixedDiv(0x8000, 1));
ASSERT_EQ(0x20000, libyuv::FixedDiv(640 * 2, 640));
ASSERT_EQ(0x30000, libyuv::FixedDiv(640 * 3, 640));
ASSERT_EQ(0x40000, libyuv::FixedDiv(640 * 4, 640));
ASSERT_EQ(0x50000, libyuv::FixedDiv(640 * 5, 640));
ASSERT_EQ(0x60000, libyuv::FixedDiv(640 * 6, 640));
ASSERT_EQ(0x70000, libyuv::FixedDiv(640 * 7, 640));
ASSERT_EQ(0x80000, libyuv::FixedDiv(640 * 8, 640));
ASSERT_EQ(0xa0000, libyuv::FixedDiv(640 * 10, 640));
ASSERT_EQ(0x20000, libyuv::FixedDiv(960 * 2, 960));
ASSERT_EQ(0x08000, libyuv::FixedDiv(640 / 2, 640));
ASSERT_EQ(0x04000, libyuv::FixedDiv(640 / 4, 640));
ASSERT_EQ(0x20000, libyuv::FixedDiv(1080 * 2, 1080));
ASSERT_EQ(0x20000, libyuv::FixedDiv(200000, 100000));
ASSERT_EQ(0x18000, libyuv::FixedDiv(150000, 100000));
ASSERT_EQ(0x20000, libyuv::FixedDiv(40000, 20000));
ASSERT_EQ(0x20000, libyuv::FixedDiv(-40000, -20000));
ASSERT_EQ(-0x20000, libyuv::FixedDiv(40000, -20000));
ASSERT_EQ(-0x20000, libyuv::FixedDiv(-40000, 20000));
ASSERT_EQ(0x10000, libyuv::FixedDiv(4095, 4095));
ASSERT_EQ(0x10000, libyuv::FixedDiv(4096, 4096));
ASSERT_EQ(0x10000, libyuv::FixedDiv(4097, 4097));
ASSERT_EQ(123 * 65536, libyuv::FixedDiv(123, 1));
EXPECT_EQ(0x20000, libyuv::FixedDiv(640 * 2, 640));
EXPECT_EQ(0x30000, libyuv::FixedDiv(640 * 3, 640));
EXPECT_EQ(0x40000, libyuv::FixedDiv(640 * 4, 640));
EXPECT_EQ(0x50000, libyuv::FixedDiv(640 * 5, 640));
EXPECT_EQ(0x60000, libyuv::FixedDiv(640 * 6, 640));
EXPECT_EQ(0x70000, libyuv::FixedDiv(640 * 7, 640));
EXPECT_EQ(0x80000, libyuv::FixedDiv(640 * 8, 640));
EXPECT_EQ(0xa0000, libyuv::FixedDiv(640 * 10, 640));
EXPECT_EQ(0x20000, libyuv::FixedDiv(960 * 2, 960));
EXPECT_EQ(0x08000, libyuv::FixedDiv(640 / 2, 640));
EXPECT_EQ(0x04000, libyuv::FixedDiv(640 / 4, 640));
EXPECT_EQ(0x20000, libyuv::FixedDiv(1080 * 2, 1080));
EXPECT_EQ(0x20000, libyuv::FixedDiv(200000, 100000));
EXPECT_EQ(0x18000, libyuv::FixedDiv(150000, 100000));
EXPECT_EQ(0x20000, libyuv::FixedDiv(40000, 20000));
EXPECT_EQ(0x20000, libyuv::FixedDiv(-40000, -20000));
EXPECT_EQ(-0x20000, libyuv::FixedDiv(40000, -20000));
EXPECT_EQ(-0x20000, libyuv::FixedDiv(-40000, 20000));
EXPECT_EQ(0x10000, libyuv::FixedDiv(4095, 4095));
EXPECT_EQ(0x10000, libyuv::FixedDiv(4096, 4096));
EXPECT_EQ(0x10000, libyuv::FixedDiv(4097, 4097));
EXPECT_EQ(123 * 65536, libyuv::FixedDiv(123, 1));
for (int i = 1; i < 4100; ++i) {
ASSERT_EQ(0x10000, libyuv::FixedDiv(i, i));
ASSERT_EQ(0x20000, libyuv::FixedDiv(i * 2, i));
ASSERT_EQ(0x30000, libyuv::FixedDiv(i * 3, i));
ASSERT_EQ(0x40000, libyuv::FixedDiv(i * 4, i));
ASSERT_EQ(0x08000, libyuv::FixedDiv(i, i * 2));
ASSERT_NEAR(16384 * 65536 / i, libyuv::FixedDiv(16384, i), 1);
EXPECT_EQ(0x10000, libyuv::FixedDiv(i, i));
EXPECT_EQ(0x20000, libyuv::FixedDiv(i * 2, i));
EXPECT_EQ(0x30000, libyuv::FixedDiv(i * 3, i));
EXPECT_EQ(0x40000, libyuv::FixedDiv(i * 4, i));
EXPECT_EQ(0x08000, libyuv::FixedDiv(i, i * 2));
EXPECT_NEAR(16384 * 65536 / i, libyuv::FixedDiv(16384, i), 1);
}
ASSERT_EQ(123 * 65536, libyuv::FixedDiv(123, 1));
EXPECT_EQ(123 * 65536, libyuv::FixedDiv(123, 1));
MemRandomize(reinterpret_cast<uint8_t*>(&num[0]), sizeof(num));
MemRandomize(reinterpret_cast<uint8_t*>(&div[0]), sizeof(div));
@ -84,7 +84,7 @@ TEST_F(LibYUVBaseTest, TestFixedDiv) {
}
for (int j = 0; j < 1280; ++j) {
result_c[j] = libyuv::FixedDiv_C(num[j], div[j]);
ASSERT_NEAR(result_c[j], result_opt[j], 1);
EXPECT_NEAR(result_c[j], result_opt[j], 1);
}
}
@ -118,7 +118,7 @@ TEST_F(LibYUVBaseTest, TestFixedDiv_Opt) {
}
for (int j = 0; j < 1280; ++j) {
result_c[j] = libyuv::FixedDiv_C(num[j], div[j]);
ASSERT_NEAR(result_c[j], result_opt[j], 1);
EXPECT_NEAR(result_c[j], result_opt[j], 1);
}
}
@ -152,7 +152,7 @@ TEST_F(LibYUVBaseTest, TestFixedDiv1_Opt) {
}
for (int j = 0; j < 1280; ++j) {
result_c[j] = libyuv::FixedDiv1_C(num[j], div[j]);
ASSERT_NEAR(result_c[j], result_opt[j], 1);
EXPECT_NEAR(result_c[j], result_opt[j], 1);
}
}
#endif // ENABLE_ROW_TESTS

File diff suppressed because it is too large Load Diff

View File

@ -75,7 +75,7 @@ static void TestRotateBpp(int src_width,
// Rotation should be exact.
for (int i = 0; i < dst_argb_plane_size; ++i) {
ASSERT_EQ(dst_argb_c[i], dst_argb_opt[i]);
EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]);
}
free_aligned_buffer_page_end(dst_argb_c);
@ -189,35 +189,35 @@ TEST_F(LibYUVRotateTest, RotatePlane90_TestStride) {
align_buffer_page_end(src_argb, argb_plane_size);
align_buffer_page_end(dst_argb, argb_plane_size);
ASSERT_EQ(0, ARGBRotate(src_argb, benchmark_width_ * 4, dst_argb,
EXPECT_EQ(0, ARGBRotate(src_argb, benchmark_width_ * 4, dst_argb,
benchmark_width_ * 4, benchmark_width_,
benchmark_height_, kRotate0));
ASSERT_EQ(0, ARGBRotate(src_argb, benchmark_width_ * 4 - 1, dst_argb,
EXPECT_EQ(0, ARGBRotate(src_argb, benchmark_width_ * 4 - 1, dst_argb,
benchmark_width_ * 4 - 1, benchmark_width_ - 1,
benchmark_height_, kRotate0));
ASSERT_EQ(0, ARGBRotate(src_argb, benchmark_width_ * 4, dst_argb,
EXPECT_EQ(0, ARGBRotate(src_argb, benchmark_width_ * 4, dst_argb,
benchmark_width_ * 4, benchmark_width_,
benchmark_height_, kRotate180));
ASSERT_EQ(0, ARGBRotate(src_argb, benchmark_width_ * 4 - 1, dst_argb,
EXPECT_EQ(0, ARGBRotate(src_argb, benchmark_width_ * 4 - 1, dst_argb,
benchmark_width_ * 4 - 1, benchmark_width_ - 1,
benchmark_height_, kRotate180));
ASSERT_EQ(0, ARGBRotate(src_argb, benchmark_width_ * 4, dst_argb,
EXPECT_EQ(0, ARGBRotate(src_argb, benchmark_width_ * 4, dst_argb,
abs(benchmark_height_) * 4, benchmark_width_,
benchmark_height_, kRotate90));
ASSERT_EQ(-1, ARGBRotate(src_argb, benchmark_width_ * 4 - 1, dst_argb,
EXPECT_EQ(-1, ARGBRotate(src_argb, benchmark_width_ * 4 - 1, dst_argb,
abs(benchmark_height_) * 4, benchmark_width_ - 1,
benchmark_height_, kRotate90));
ASSERT_EQ(0, ARGBRotate(src_argb, benchmark_width_ * 4, dst_argb,
EXPECT_EQ(0, ARGBRotate(src_argb, benchmark_width_ * 4, dst_argb,
abs(benchmark_height_) * 4, benchmark_width_,
benchmark_height_, kRotate270));
ASSERT_EQ(-1, ARGBRotate(src_argb, benchmark_width_ * 4 - 1, dst_argb,
EXPECT_EQ(-1, ARGBRotate(src_argb, benchmark_width_ * 4 - 1, dst_argb,
abs(benchmark_height_) * 4, benchmark_width_ - 1,
benchmark_height_, kRotate270));
@ -271,7 +271,7 @@ static void TestRotatePlane_16(int src_width,
// Rotation should be exact.
for (int i = 0; i < dst_plane_size; ++i) {
ASSERT_EQ(dst_c[i], dst_opt[i]);
EXPECT_EQ(dst_c[i], dst_opt[i]);
}
free_aligned_buffer_page_end_16(dst_c);

View File

@ -20,7 +20,7 @@
namespace libyuv {
#define SUBSAMPLE(v, a) ((((v) + (a) - 1)) / (a))
#define SUBSAMPLE(v, a) ((((v) + (a)-1)) / (a))
static void I420TestRotate(int src_width,
int src_height,
@ -78,7 +78,7 @@ static void I420TestRotate(int src_width,
// Rotation should be exact.
for (int i = 0; i < dst_i420_size; ++i) {
ASSERT_EQ(dst_i420_c[i], dst_i420_opt[i]);
EXPECT_EQ(dst_i420_c[i], dst_i420_opt[i]);
}
free_aligned_buffer_page_end(dst_i420_c);
@ -197,7 +197,7 @@ static void I422TestRotate(int src_width,
// Rotation should be exact.
for (int i = 0; i < dst_i422_size; ++i) {
ASSERT_EQ(dst_i422_c[i], dst_i422_opt[i]);
EXPECT_EQ(dst_i422_c[i], dst_i422_opt[i]);
}
free_aligned_buffer_page_end(dst_i422_c);
@ -283,7 +283,7 @@ static void I444TestRotate(int src_width,
// Rotation should be exact.
for (int i = 0; i < dst_i444_size; ++i) {
ASSERT_EQ(dst_i444_c[i], dst_i444_opt[i]);
EXPECT_EQ(dst_i444_c[i], dst_i444_opt[i]);
}
free_aligned_buffer_page_end(dst_i444_c);
@ -401,7 +401,7 @@ static void NV12TestRotate(int src_width,
// Rotation should be exact.
for (int i = 0; i < dst_i420_size; ++i) {
ASSERT_EQ(dst_i420_c[i], dst_i420_opt[i]);
EXPECT_EQ(dst_i420_c[i], dst_i420_opt[i]);
}
free_aligned_buffer_page_end(dst_i420_c);
@ -495,15 +495,15 @@ TEST_F(LibYUVRotateTest, NV12Rotate270_Invert) {
const int kHeight = benchmark_height_; \
const int kSizeUV = \
SUBSAMPLE(kWidth, SRC_SUBSAMP_X) * SUBSAMPLE(kHeight, SRC_SUBSAMP_Y); \
align_buffer_page_end(src_y, kWidth * kHeight + OFF); \
align_buffer_page_end(src_y, kWidth* kHeight + OFF); \
align_buffer_page_end(src_uv, \
kSizeUV * ((PIXEL_STRIDE == 3) ? 3 : 2) + OFF); \
align_buffer_page_end(dst_y_c, kWidth * kHeight); \
kSizeUV*((PIXEL_STRIDE == 3) ? 3 : 2) + OFF); \
align_buffer_page_end(dst_y_c, kWidth* kHeight); \
align_buffer_page_end(dst_u_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end(dst_v_c, SUBSAMPLE(kWidth, SUBSAMP_X) * \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end(dst_y_opt, kWidth * kHeight); \
align_buffer_page_end(dst_y_opt, kWidth* kHeight); \
align_buffer_page_end(dst_u_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
SUBSAMPLE(kHeight, SUBSAMP_Y)); \
align_buffer_page_end(dst_v_opt, SUBSAMPLE(kWidth, SUBSAMP_X) * \
@ -522,12 +522,12 @@ TEST_F(LibYUVRotateTest, NV12Rotate270_Invert) {
(fastrand() & 0xff); \
} \
} \
memset(dst_y_c, 1, kWidth * kHeight); \
memset(dst_y_c, 1, kWidth* kHeight); \
memset(dst_u_c, 2, \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
memset(dst_v_c, 3, \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
memset(dst_y_opt, 101, kWidth * kHeight); \
memset(dst_y_opt, 101, kWidth* kHeight); \
memset(dst_u_opt, 102, \
SUBSAMPLE(kWidth, SUBSAMP_X) * SUBSAMPLE(kHeight, SUBSAMP_Y)); \
memset(dst_v_opt, 103, \
@ -550,18 +550,18 @@ TEST_F(LibYUVRotateTest, NV12Rotate270_Invert) {
} \
for (int i = 0; i < kHeight; ++i) { \
for (int j = 0; j < kWidth; ++j) { \
ASSERT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \
EXPECT_EQ(dst_y_c[i * kWidth + j], dst_y_opt[i * kWidth + j]); \
} \
} \
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \
ASSERT_EQ(dst_u_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j], \
EXPECT_EQ(dst_u_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j], \
dst_u_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]); \
} \
} \
for (int i = 0; i < SUBSAMPLE(kHeight, SUBSAMP_Y); ++i) { \
for (int j = 0; j < SUBSAMPLE(kWidth, SUBSAMP_X); ++j) { \
ASSERT_EQ(dst_v_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j], \
EXPECT_EQ(dst_v_c[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j], \
dst_v_opt[i * SUBSAMPLE(kWidth, SUBSAMP_X) + j]); \
} \
} \
@ -656,7 +656,7 @@ static void I010TestRotate(int src_width,
// Rotation should be exact.
for (int i = 0; i < dst_i010_size; ++i) {
ASSERT_EQ(dst_i010_c[i], dst_i010_opt[i]);
EXPECT_EQ(dst_i010_c[i], dst_i010_opt[i]);
}
free_aligned_buffer_page_end_16(dst_i010_c);
@ -744,7 +744,7 @@ static void I210TestRotate(int src_width,
// Rotation should be exact.
for (int i = 0; i < dst_i210_size; ++i) {
ASSERT_EQ(dst_i210_c[i], dst_i210_opt[i]);
EXPECT_EQ(dst_i210_c[i], dst_i210_opt[i]);
}
free_aligned_buffer_page_end_16(dst_i210_c);
@ -830,7 +830,7 @@ static void I410TestRotate(int src_width,
// Rotation should be exact.
for (int i = 0; i < dst_i410_size; ++i) {
ASSERT_EQ(dst_i410_c[i], dst_i410_opt[i]);
EXPECT_EQ(dst_i410_c[i], dst_i410_opt[i]);
}
free_aligned_buffer_page_end_16(dst_i410_c);
@ -906,8 +906,8 @@ TEST_F(LibYUVRotateTest, Transpose4x4_Test) {
for (int i = 0; i < 4; ++i) {
for (int j = 0; j < 4; ++j) {
ASSERT_EQ(dst_pixels_c[i][j], src_pixels[j][i]);
ASSERT_EQ(dst_pixels_c[i][j], dst_pixels_opt[i][j]);
EXPECT_EQ(dst_pixels_c[i][j], src_pixels[j][i]);
EXPECT_EQ(dst_pixels_c[i][j], dst_pixels_opt[i][j]);
}
}
}
@ -949,7 +949,7 @@ TEST_F(LibYUVRotateTest, Transpose4x4_Opt) {
}
for (int i = 0; i < width * height; ++i) {
ASSERT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
}
free_aligned_buffer_page_end(src_pixels);

View File

@ -245,14 +245,14 @@ static int ARGBClipTestFilter(int src_width,
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
ASSERT_LE(diff, max_diff); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, DISABLED_##ARGBScaleDownClipBy##name##_##filter) { \
int diff = ARGBClipTestFilter( \
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
kFilter##filter, benchmark_iterations_); \
ASSERT_LE(diff, max_diff); \
EXPECT_LE(diff, max_diff); \
}
// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
@ -294,28 +294,28 @@ TEST_FACTOR(3, 1, 3)
int diff = ARGBTestFilter(benchmark_width_, benchmark_height_, width, \
height, kFilter##filter, benchmark_iterations_, \
disable_cpu_flags_, benchmark_cpu_info_); \
ASSERT_LE(diff, max_diff); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter) { \
int diff = ARGBTestFilter(width, height, Abs(benchmark_width_), \
Abs(benchmark_height_), kFilter##filter, \
benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
ASSERT_LE(diff, max_diff); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, \
DISABLED_##name##ClipTo##width##x##height##_##filter) { \
int diff = \
ARGBClipTestFilter(benchmark_width_, benchmark_height_, width, height, \
kFilter##filter, benchmark_iterations_); \
ASSERT_LE(diff, max_diff); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, \
DISABLED_##name##ClipFrom##width##x##height##_##filter) { \
int diff = ARGBClipTestFilter(width, height, Abs(benchmark_width_), \
Abs(benchmark_height_), kFilter##filter, \
benchmark_iterations_); \
ASSERT_LE(diff, max_diff); \
EXPECT_LE(diff, max_diff); \
}
#ifndef DISABLE_SLOW_TESTS
@ -357,7 +357,7 @@ TEST_SCALETO(ARGBScale, 1920, 1080)
benchmark_height_, benchmark_width_, \
kFilter##filter, benchmark_iterations_, \
disable_cpu_flags_, benchmark_cpu_info_); \
ASSERT_LE(diff, max_diff); \
EXPECT_LE(diff, max_diff); \
}
#if defined(ENABLE_FULL_TESTS)
@ -430,14 +430,12 @@ static void FillRamp(uint8_t* buf,
}
// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
static void YUVToARGBTestFilter(int src_width,
int src_height,
int dst_width,
int dst_height,
FilterMode f,
int benchmark_iterations,
int error_threshold,
int* max_diff_out) {
static int YUVToARGBTestFilter(int src_width,
int src_height,
int dst_width,
int dst_height,
FilterMode f,
int benchmark_iterations) {
int64_t src_y_plane_size = Abs(src_width) * Abs(src_height);
int64_t src_uv_plane_size =
((Abs(src_width) + 1) / 2) * ((Abs(src_height) + 1) / 2);
@ -448,13 +446,13 @@ static void YUVToARGBTestFilter(int src_width,
align_buffer_page_end(src_u, src_uv_plane_size);
align_buffer_page_end(src_v, src_uv_plane_size);
int64_t dst_argb_plane_size = (dst_width) * (dst_height) * 4LL;
int dst_stride_argb = (dst_width) * 4;
int64_t dst_argb_plane_size = (dst_width) * (dst_height)*4LL;
int dst_stride_argb = (dst_width)*4;
align_buffer_page_end(dst_argb_c, dst_argb_plane_size);
align_buffer_page_end(dst_argb_opt, dst_argb_plane_size);
if (!dst_argb_c || !dst_argb_opt || !src_y || !src_u || !src_v) {
printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
return;
return 0;
}
// Fill YUV image with continuous ramp, which is less sensitive to
// subsampling and filtering differences for test purposes.
@ -483,44 +481,36 @@ static void YUVToARGBTestFilter(int src_width,
int abs_diff = Abs(dst_argb_c[(i * dst_stride_argb) + j] -
dst_argb_opt[(i * dst_stride_argb) + j]);
if (abs_diff > max_diff) {
max_diff = abs_diff;
}
if (abs_diff > error_threshold) {
printf("error %d at %d,%d c %d opt %d\n", abs_diff, j, i,
printf("error %d at %d,%d c %d opt %d", abs_diff, j, i,
dst_argb_c[(i * dst_stride_argb) + j],
dst_argb_opt[(i * dst_stride_argb) + j]);
goto cleanup;
EXPECT_LE(abs_diff, 40);
max_diff = abs_diff;
}
}
}
cleanup:
if (max_diff_out) {
*max_diff_out = max_diff;
}
free_aligned_buffer_page_end(dst_argb_c);
free_aligned_buffer_page_end(dst_argb_opt);
free_aligned_buffer_page_end(src_y);
free_aligned_buffer_page_end(src_u);
free_aligned_buffer_page_end(src_v);
return max_diff;
}
TEST_F(LibYUVScaleTest, YUVToRGBScaleUp) {
int diff = 0;
YUVToARGBTestFilter(benchmark_width_, benchmark_height_,
benchmark_width_ * 3 / 2, benchmark_height_ * 3 / 2,
libyuv::kFilterBilinear, benchmark_iterations_, 10,
&diff);
ASSERT_LE(diff, 10);
int diff =
YUVToARGBTestFilter(benchmark_width_, benchmark_height_,
benchmark_width_ * 3 / 2, benchmark_height_ * 3 / 2,
libyuv::kFilterBilinear, benchmark_iterations_);
EXPECT_LE(diff, 10);
}
TEST_F(LibYUVScaleTest, YUVToRGBScaleDown) {
int diff = 0;
YUVToARGBTestFilter(benchmark_width_ * 3 / 2, benchmark_height_ * 3 / 2,
benchmark_width_, benchmark_height_,
libyuv::kFilterBilinear, benchmark_iterations_, 10,
&diff);
ASSERT_LE(diff, 10);
int diff = YUVToARGBTestFilter(
benchmark_width_ * 3 / 2, benchmark_height_ * 3 / 2, benchmark_width_,
benchmark_height_, libyuv::kFilterBilinear, benchmark_iterations_);
EXPECT_LE(diff, 10);
}
TEST_F(LibYUVScaleTest, ARGBTest3x) {
@ -543,18 +533,18 @@ TEST_F(LibYUVScaleTest, ARGBTest3x) {
kFilterBilinear);
}
ASSERT_EQ(225, dest_pixels[0]);
ASSERT_EQ(255 - 225, dest_pixels[1]);
ASSERT_EQ(226, dest_pixels[2]);
ASSERT_EQ(235, dest_pixels[3]);
EXPECT_EQ(225, dest_pixels[0]);
EXPECT_EQ(255 - 225, dest_pixels[1]);
EXPECT_EQ(226, dest_pixels[2]);
EXPECT_EQ(235, dest_pixels[3]);
ARGBScale(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1,
kFilterNone);
ASSERT_EQ(225, dest_pixels[0]);
ASSERT_EQ(255 - 225, dest_pixels[1]);
ASSERT_EQ(226, dest_pixels[2]);
ASSERT_EQ(235, dest_pixels[3]);
EXPECT_EQ(225, dest_pixels[0]);
EXPECT_EQ(255 - 225, dest_pixels[1]);
EXPECT_EQ(226, dest_pixels[2]);
EXPECT_EQ(235, dest_pixels[3]);
free_aligned_buffer_page_end(dest_pixels);
free_aligned_buffer_page_end(orig_pixels);
@ -580,18 +570,18 @@ TEST_F(LibYUVScaleTest, ARGBTest4x) {
kFilterBilinear);
}
ASSERT_NEAR(66, dest_pixels[0], 4);
ASSERT_NEAR(255 - 66, dest_pixels[1], 4);
ASSERT_NEAR(67, dest_pixels[2], 4);
ASSERT_NEAR(76, dest_pixels[3], 4);
EXPECT_NEAR(66, dest_pixels[0], 4);
EXPECT_NEAR(255 - 66, dest_pixels[1], 4);
EXPECT_NEAR(67, dest_pixels[2], 4);
EXPECT_NEAR(76, dest_pixels[3], 4);
ARGBScale(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1,
kFilterNone);
ASSERT_EQ(2, dest_pixels[0]);
ASSERT_EQ(255 - 2, dest_pixels[1]);
ASSERT_EQ(3, dest_pixels[2]);
ASSERT_EQ(12, dest_pixels[3]);
EXPECT_EQ(2, dest_pixels[0]);
EXPECT_EQ(255 - 2, dest_pixels[1]);
EXPECT_EQ(3, dest_pixels[2]);
EXPECT_EQ(12, dest_pixels[3]);
free_aligned_buffer_page_end(dest_pixels);
free_aligned_buffer_page_end(orig_pixels);

View File

@ -42,108 +42,6 @@
namespace libyuv {
// POC: int row_stride = src_stride * 2 overflows to a small negative value
// when src_stride is close to INT_MAX, causing src_ptr to walk backward
// past the start of the source allocation on the second loop iteration.
// With src_stride = 0x7FFFFFFE, row_stride = (int)0xFFFFFFFC = -4, so on
// y=1 ScaleRowDown2Box reads 4 bytes before the heap allocation.
TEST_F(LibYUVScaleTest, ScalePlaneDown2_RowStrideOverflow) {
constexpr int kSrcStride = 0x7FFFFFFE; // INT_MAX - 1
constexpr int kSrcW = 64;
constexpr int kSrcH = 4;
constexpr int kDstW = 32;
constexpr int kDstH = 2;
// src_size = (kSrcH - 1) * stride + width.
size_t src_size = kSrcH - 1;
if (src_size > SIZE_MAX / kSrcStride) {
GTEST_SKIP() << "could not represent allocation size in size_t";
}
src_size *= kSrcStride;
if (src_size > SIZE_MAX - kSrcW) {
GTEST_SKIP() << "could not represent allocation size in size_t";
}
src_size += kSrcW;
#if defined(__aarch64__)
// Infer malloc can accept a large size for cpu with dot product (a76/a55)
int has_large_malloc = TestCpuFlag(kCpuHasNeonDotProd);
#else
int has_large_malloc = 1;
#endif
if (!has_large_malloc) {
GTEST_SKIP() << "large allocation may assert for " << src_size << " bytes";
}
uint8_t* src = new (std::nothrow) uint8_t[src_size];
if (!src) {
GTEST_SKIP() << "could not allocate " << src_size << " bytes";
}
uint8_t dst[kDstW * kDstH];
uint8_t* src_row = src;
for (int i = 0; i < kSrcH; i++) {
memset(src_row, 0x41, kSrcW);
src_row += kSrcStride;
}
// Force the C row kernel: the SIMD kernels are inline asm that ASAN does not
// instrument, so they silently read OOB without a report.
MaskCpuFlags(1);
// 2*dst == src on both axes -> ScalePlane dispatches to ScalePlaneDown2.
// int row_stride = kSrcStride * 2 wraps to -4; on y=1 src_ptr underflows.
ScalePlane(src, kSrcStride, kSrcW, kSrcH, dst, kDstW, kDstW, kDstH,
kFilterBox);
MaskCpuFlags(0);
delete[] src;
}
// POC: same defect in the 1/4 fast path. src_stride = 0x3FFFFFFF gives
// int row_stride = src_stride * 4 = (int)0xFFFFFFFC = -4.
TEST_F(LibYUVScaleTest, ScalePlaneDown4_RowStrideOverflow) {
constexpr int kSrcStride = 0x3FFFFFFF; // INT_MAX / 4 (rounded down)
constexpr int kSrcW = 64;
constexpr int kSrcH = 8;
constexpr int kDstW = 16;
constexpr int kDstH = 2;
// src_size = (kSrcH - 1) * stride + width.
size_t src_size = kSrcH - 1;
if (src_size > SIZE_MAX / kSrcStride) {
GTEST_SKIP() << "could not represent allocation size in size_t";
}
src_size *= kSrcStride;
if (src_size > SIZE_MAX - kSrcW) {
GTEST_SKIP() << "could not represent allocation size in size_t";
}
src_size += kSrcW;
#if defined(__aarch64__)
// Infer malloc can accept a large size for cpu with dot product (a76/a55)
int has_large_malloc = TestCpuFlag(kCpuHasNeonDotProd);
#else
int has_large_malloc = 1;
#endif
if (!has_large_malloc) {
GTEST_SKIP() << "large allocation may assert for " << src_size << " bytes";
}
uint8_t* src = new (std::nothrow) uint8_t[src_size];
if (!src) {
GTEST_SKIP() << "could not allocate " << src_size << " bytes";
}
uint8_t dst[kDstW * kDstH];
uint8_t* src_row = src;
for (int i = 0; i < kSrcH; i++) {
memset(src_row, 0x41, kSrcW);
src_row += kSrcStride;
}
// Force the C row kernel: the SIMD kernels are inline asm that ASAN does not
// instrument, so they silently read OOB without a report.
MaskCpuFlags(1);
// 4*dst == src on both axes with kFilterBox -> ScalePlaneDown4.
ScalePlane(src, kSrcStride, kSrcW, kSrcH, dst, kDstW, kDstW, kDstH,
kFilterBox);
MaskCpuFlags(0);
delete[] src;
}
#ifdef ENABLE_ROW_TESTS
#ifdef HAS_SCALEROWDOWN2_SSSE3
TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_Odd_SSSE3) {
@ -187,49 +85,49 @@ TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_Odd_SSSE3) {
// Test regular half size.
ScaleRowDown2Box_C(orig_pixels, 128, dst_pixels_c, 64);
ASSERT_EQ(64u, dst_pixels_c[0]);
ASSERT_EQ(25u, dst_pixels_c[1]);
ASSERT_EQ(13u, dst_pixels_c[2]);
ASSERT_EQ(5u, dst_pixels_c[3]);
ASSERT_EQ(0u, dst_pixels_c[4]);
ASSERT_EQ(133u, dst_pixels_c[63]);
EXPECT_EQ(64u, dst_pixels_c[0]);
EXPECT_EQ(25u, dst_pixels_c[1]);
EXPECT_EQ(13u, dst_pixels_c[2]);
EXPECT_EQ(5u, dst_pixels_c[3]);
EXPECT_EQ(0u, dst_pixels_c[4]);
EXPECT_EQ(133u, dst_pixels_c[63]);
// Test Odd width version - Last pixel is just 1 horizontal pixel.
ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64);
ASSERT_EQ(64u, dst_pixels_c[0]);
ASSERT_EQ(25u, dst_pixels_c[1]);
ASSERT_EQ(13u, dst_pixels_c[2]);
ASSERT_EQ(5u, dst_pixels_c[3]);
ASSERT_EQ(0u, dst_pixels_c[4]);
ASSERT_EQ(10u, dst_pixels_c[63]);
EXPECT_EQ(64u, dst_pixels_c[0]);
EXPECT_EQ(25u, dst_pixels_c[1]);
EXPECT_EQ(13u, dst_pixels_c[2]);
EXPECT_EQ(5u, dst_pixels_c[3]);
EXPECT_EQ(0u, dst_pixels_c[4]);
EXPECT_EQ(10u, dst_pixels_c[63]);
// Test one pixel less, should skip the last pixel.
memset(dst_pixels_c, 0, sizeof(dst_pixels_c));
ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 63);
ASSERT_EQ(64u, dst_pixels_c[0]);
ASSERT_EQ(25u, dst_pixels_c[1]);
ASSERT_EQ(13u, dst_pixels_c[2]);
ASSERT_EQ(5u, dst_pixels_c[3]);
ASSERT_EQ(0u, dst_pixels_c[4]);
ASSERT_EQ(0u, dst_pixels_c[63]);
EXPECT_EQ(64u, dst_pixels_c[0]);
EXPECT_EQ(25u, dst_pixels_c[1]);
EXPECT_EQ(13u, dst_pixels_c[2]);
EXPECT_EQ(5u, dst_pixels_c[3]);
EXPECT_EQ(0u, dst_pixels_c[4]);
EXPECT_EQ(0u, dst_pixels_c[63]);
// Test regular half size SSSE3.
ScaleRowDown2Box_SSSE3(orig_pixels, 128, dst_pixels_opt, 64);
ASSERT_EQ(64u, dst_pixels_opt[0]);
ASSERT_EQ(25u, dst_pixels_opt[1]);
ASSERT_EQ(13u, dst_pixels_opt[2]);
ASSERT_EQ(5u, dst_pixels_opt[3]);
ASSERT_EQ(0u, dst_pixels_opt[4]);
ASSERT_EQ(133u, dst_pixels_opt[63]);
EXPECT_EQ(64u, dst_pixels_opt[0]);
EXPECT_EQ(25u, dst_pixels_opt[1]);
EXPECT_EQ(13u, dst_pixels_opt[2]);
EXPECT_EQ(5u, dst_pixels_opt[3]);
EXPECT_EQ(0u, dst_pixels_opt[4]);
EXPECT_EQ(133u, dst_pixels_opt[63]);
// Compare C and SSSE3 match.
ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64);
ScaleRowDown2Box_Odd_SSSE3(orig_pixels, 128, dst_pixels_opt, 64);
for (int i = 0; i < 64; ++i) {
ASSERT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
}
}
}
@ -262,11 +160,11 @@ TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_16) {
}
for (int i = 0; i < 1280; ++i) {
ASSERT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
}
ASSERT_EQ(dst_pixels_c[0], (0 + 1 + 2560 + 2561 + 2) / 4);
ASSERT_EQ(dst_pixels_c[1279], 3839);
EXPECT_EQ(dst_pixels_c[0], (0 + 1 + 2560 + 2561 + 2) / 4);
EXPECT_EQ(dst_pixels_c[1279], 3839);
}
#endif // ENABLE_ROW_TESTS
@ -346,7 +244,7 @@ static int TestPlaneFilter_16(int src_width,
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
ASSERT_LE(diff, max_diff); \
EXPECT_LE(diff, max_diff); \
}
// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
@ -385,12 +283,12 @@ TEST_F(LibYUVScaleTest, PlaneTest3x) {
kFilterBilinear);
}
ASSERT_EQ(225, dest_pixels[0]);
EXPECT_EQ(225, dest_pixels[0]);
ScalePlane(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1,
kFilterNone);
ASSERT_EQ(225, dest_pixels[0]);
EXPECT_EQ(225, dest_pixels[0]);
free_aligned_buffer_page_end(dest_pixels);
free_aligned_buffer_page_end(orig_pixels);
@ -413,12 +311,12 @@ TEST_F(LibYUVScaleTest, PlaneTest4x) {
kFilterBilinear);
}
ASSERT_EQ(66, dest_pixels[0]);
EXPECT_EQ(66, dest_pixels[0]);
ScalePlane(orig_pixels, kSrcStride, 640, 4, dest_pixels, kDstStride, 160, 1,
kFilterNone);
ASSERT_EQ(2, dest_pixels[0]); // expect the 3rd pixel of the 3rd row
EXPECT_EQ(2, dest_pixels[0]); // expect the 3rd pixel of the 3rd row
free_aligned_buffer_page_end(dest_pixels);
free_aligned_buffer_page_end(orig_pixels);
@ -447,7 +345,7 @@ TEST_F(LibYUVScaleTest, PlaneTestRotate_None) {
}
for (int i = 0; i < kSize; ++i) {
ASSERT_EQ(dest_c_pixels[i], dest_opt_pixels[i]);
EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]);
}
free_aligned_buffer_page_end(dest_c_pixels);
@ -477,7 +375,7 @@ TEST_F(LibYUVScaleTest, PlaneTestRotate_Bilinear) {
}
for (int i = 0; i < kSize; ++i) {
ASSERT_EQ(dest_c_pixels[i], dest_opt_pixels[i]);
EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]);
}
free_aligned_buffer_page_end(dest_c_pixels);
@ -508,7 +406,7 @@ TEST_F(LibYUVScaleTest, PlaneTestRotate_Box) {
}
for (int i = 0; i < kSize; ++i) {
ASSERT_EQ(dest_c_pixels[i], dest_opt_pixels[i]);
EXPECT_EQ(dest_c_pixels[i], dest_opt_pixels[i]);
}
free_aligned_buffer_page_end(dest_c_pixels);
@ -534,9 +432,9 @@ TEST_F(LibYUVScaleTest, PlaneTest1_Box) {
/* dst_width= */ 1, /* dst_height= */ 2,
libyuv::kFilterBox);
ASSERT_EQ(dst_pixels[0], 1);
ASSERT_EQ(dst_pixels[1], 1);
ASSERT_EQ(dst_pixels[2], 3);
EXPECT_EQ(dst_pixels[0], 1);
EXPECT_EQ(dst_pixels[1], 1);
EXPECT_EQ(dst_pixels[2], 3);
free_aligned_buffer_page_end(dst_pixels);
free_aligned_buffer_page_end(orig_pixels);
@ -562,9 +460,9 @@ TEST_F(LibYUVScaleTest, PlaneTest1_16_Box) {
/* src_height= */ 1, dst_pixels, /* dst_stride= */ 1,
/* dst_width= */ 1, /* dst_height= */ 2, libyuv::kFilterNone);
ASSERT_EQ(dst_pixels[0], 1);
ASSERT_EQ(dst_pixels[1], 1);
ASSERT_EQ(dst_pixels[2], 3);
EXPECT_EQ(dst_pixels[0], 1);
EXPECT_EQ(dst_pixels[1], 1);
EXPECT_EQ(dst_pixels[2], 3);
free_aligned_buffer_page_end(dst_pixels_alloc);
free_aligned_buffer_page_end(orig_pixels_alloc);
@ -631,58 +529,9 @@ TEST_F(LibYUVScaleTest, ScalePlaneVertical_IntStrideOverflow) {
kDstHeight, kFilterNone);
// Not reached under ASAN.
ASSERT_EQ(0, r);
EXPECT_EQ(0, r);
delete[] src;
delete[] dst;
}
TEST_F(LibYUVScaleTest, ScalePlane_InvalidInputs) {
uint8_t src[16] = {0};
uint8_t dst[16] = {0};
// NULL src/dst
EXPECT_EQ(-1, ScalePlane(nullptr, 4, 4, 4, dst, 4, 4, 4, kFilterNone));
EXPECT_EQ(-1, ScalePlane(src, 4, 4, 4, nullptr, 4, 4, 4, kFilterNone));
// Width/height <= 0 (except src_height which can be negative but not 0)
EXPECT_EQ(-1, ScalePlane(src, 4, 0, 4, dst, 4, 4, 4, kFilterNone));
EXPECT_EQ(-1, ScalePlane(src, 4, -1, 4, dst, 4, 4, 4, kFilterNone));
EXPECT_EQ(-1, ScalePlane(src, 4, 4, 0, dst, 4, 4, 4, kFilterNone));
EXPECT_EQ(-1, ScalePlane(src, 4, 4, 4, dst, 4, 0, 4, kFilterNone));
EXPECT_EQ(-1, ScalePlane(src, 4, 4, 4, dst, 4, -1, 4, kFilterNone));
EXPECT_EQ(-1, ScalePlane(src, 4, 4, 4, dst, 4, 4, 0, kFilterNone));
EXPECT_EQ(-1, ScalePlane(src, 4, 4, 4, dst, 4, 4, -1, kFilterNone));
// Width/height too large (> 32768)
EXPECT_EQ(-1, ScalePlane(src, 4, 32769, 4, dst, 4, 4, 4, kFilterNone));
EXPECT_EQ(-1, ScalePlane(src, 4, 4, 32769, dst, 4, 4, 4, kFilterNone));
EXPECT_EQ(-1, ScalePlane(src, 4, 4, -32769, dst, 4, 4, 4, kFilterNone));
// Valid edge cases
EXPECT_EQ(0, ScalePlane(src, 4, 1, 1, dst, 4, 1, 1, kFilterNone));
EXPECT_EQ(0, ScalePlane(src, 4, 1, -1, dst, 4, 1, 1, kFilterNone));
}
TEST_F(LibYUVScaleTest, ScalePlane_16_InvalidInputs) {
uint16_t src[16] = {0};
uint16_t dst[16] = {0};
EXPECT_EQ(-1, ScalePlane_16(nullptr, 4, 4, 4, dst, 4, 4, 4, kFilterNone));
EXPECT_EQ(-1, ScalePlane_16(src, 4, 4, 4, nullptr, 4, 4, 4, kFilterNone));
EXPECT_EQ(-1, ScalePlane_16(src, 4, 0, 4, dst, 4, 4, 4, kFilterNone));
EXPECT_EQ(-1, ScalePlane_16(src, 4, 32769, 4, dst, 4, 4, 4, kFilterNone));
EXPECT_EQ(-1, ScalePlane_16(src, 4, 4, -32769, dst, 4, 4, 4, kFilterNone));
}
TEST_F(LibYUVScaleTest, ScalePlane_12_InvalidInputs) {
uint16_t src[16] = {0};
uint16_t dst[16] = {0};
EXPECT_EQ(-1, ScalePlane_12(nullptr, 4, 4, 4, dst, 4, 4, 4, kFilterNone));
EXPECT_EQ(-1, ScalePlane_12(src, 4, 4, 4, nullptr, 4, 4, 4, kFilterNone));
EXPECT_EQ(-1, ScalePlane_12(src, 4, 0, 4, dst, 4, 4, 4, kFilterNone));
EXPECT_EQ(-1, ScalePlane_12(src, 4, 32769, 4, dst, 4, 4, 4, kFilterNone));
EXPECT_EQ(-1, ScalePlane_12(src, 4, 4, -32769, dst, 4, 4, 4, kFilterNone));
}
} // namespace libyuv

View File

@ -128,7 +128,7 @@ static int RGBTestFilter(int src_width,
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
ASSERT_LE(diff, max_diff); \
EXPECT_LE(diff, max_diff); \
}
#if defined(ENABLE_FULL_TESTS)
@ -163,14 +163,14 @@ TEST_FACTOR(3, 1, 3)
int diff = RGBTestFilter(benchmark_width_, benchmark_height_, width, \
height, kFilter##filter, benchmark_iterations_, \
disable_cpu_flags_, benchmark_cpu_info_); \
ASSERT_LE(diff, max_diff); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter) { \
int diff = RGBTestFilter(width, height, Abs(benchmark_width_), \
Abs(benchmark_height_), kFilter##filter, \
benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
ASSERT_LE(diff, max_diff); \
EXPECT_LE(diff, max_diff); \
}
#if defined(ENABLE_FULL_TESTS)
@ -202,7 +202,7 @@ TEST_SCALETO(RGBScale, 1920, 1080)
benchmark_height_, benchmark_width_, \
kFilter##filter, benchmark_iterations_, \
disable_cpu_flags_, benchmark_cpu_info_); \
ASSERT_LE(diff, max_diff); \
EXPECT_LE(diff, max_diff); \
}
#if defined(ENABLE_FULL_TESTS)
@ -233,14 +233,14 @@ TEST_F(LibYUVScaleTest, RGBTest3x) {
kFilterBilinear);
}
ASSERT_EQ(225, dest_pixels[0]);
ASSERT_EQ(255 - 225, dest_pixels[1]);
EXPECT_EQ(225, dest_pixels[0]);
EXPECT_EQ(255 - 225, dest_pixels[1]);
RGBScale(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1,
kFilterNone);
ASSERT_EQ(225, dest_pixels[0]);
ASSERT_EQ(255 - 225, dest_pixels[1]);
EXPECT_EQ(225, dest_pixels[0]);
EXPECT_EQ(255 - 225, dest_pixels[1]);
free_aligned_buffer_page_end(dest_pixels);
free_aligned_buffer_page_end(orig_pixels);
@ -264,14 +264,14 @@ TEST_F(LibYUVScaleTest, RGBTest4x) {
kFilterBilinear);
}
ASSERT_EQ(66, dest_pixels[0]);
ASSERT_EQ(190, dest_pixels[1]);
EXPECT_EQ(66, dest_pixels[0]);
EXPECT_EQ(190, dest_pixels[1]);
RGBScale(orig_pixels, kSrcStride, 64, 4, dest_pixels, kDstStride, 16, 1,
kFilterNone);
ASSERT_EQ(2, dest_pixels[0]); // expect the 3rd pixel of the 3rd row
ASSERT_EQ(255 - 2, dest_pixels[1]);
EXPECT_EQ(2, dest_pixels[0]); // expect the 3rd pixel of the 3rd row
EXPECT_EQ(255 - 2, dest_pixels[1]);
free_aligned_buffer_page_end(dest_pixels);
free_aligned_buffer_page_end(orig_pixels);

View File

@ -757,7 +757,7 @@ static int NV12TestFilter(int src_width,
int src_height_uv = (Abs(src_height) + 1) >> 1;
int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv) * 2;
int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv)*2;
int src_stride_y = Abs(src_width);
int src_stride_uv = src_width_uv * 2;
@ -775,7 +775,7 @@ static int NV12TestFilter(int src_width,
int dst_height_uv = (dst_height + 1) >> 1;
int64_t dst_y_plane_size = (dst_width) * (dst_height);
int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv) * 2;
int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv)*2;
int dst_stride_y = dst_width;
int dst_stride_uv = dst_width_uv * 2;
@ -856,7 +856,7 @@ static int NV12TestFilter(int src_width,
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
ASSERT_LE(diff, max_diff); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, I444ScaleDownBy##name##_##filter) { \
int diff = I444TestFilter( \
@ -864,7 +864,7 @@ static int NV12TestFilter(int src_width,
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
ASSERT_LE(diff, max_diff); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, DISABLED_##I420ScaleDownBy##name##_##filter##_12) { \
int diff = I420TestFilter_12( \
@ -872,7 +872,7 @@ static int NV12TestFilter(int src_width,
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
ASSERT_LE(diff, max_diff); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, DISABLED_##I444ScaleDownBy##name##_##filter##_12) { \
int diff = I444TestFilter_12( \
@ -880,7 +880,7 @@ static int NV12TestFilter(int src_width,
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
ASSERT_LE(diff, max_diff); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, NV12ScaleDownBy##name##_##filter) { \
int diff = NV12TestFilter( \
@ -888,7 +888,7 @@ static int NV12TestFilter(int src_width,
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
ASSERT_LE(diff, max_diff); \
EXPECT_LE(diff, max_diff); \
}
// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
@ -931,61 +931,61 @@ TEST_FACTOR(3, 1, 3, 0)
int diff = I420TestFilter(benchmark_width_, benchmark_height_, width, \
height, kFilter##filter, benchmark_iterations_, \
disable_cpu_flags_, benchmark_cpu_info_); \
ASSERT_LE(diff, max_diff); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, I444##name##To##width##x##height##_##filter) { \
int diff = I444TestFilter(benchmark_width_, benchmark_height_, width, \
height, kFilter##filter, benchmark_iterations_, \
disable_cpu_flags_, benchmark_cpu_info_); \
ASSERT_LE(diff, max_diff); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, \
DISABLED_##I420##name##To##width##x##height##_##filter##_12) { \
int diff = I420TestFilter_12( \
benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
ASSERT_LE(diff, max_diff); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, \
DISABLED_##I444##name##To##width##x##height##_##filter##_12) { \
int diff = I444TestFilter_12( \
benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
ASSERT_LE(diff, max_diff); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, \
DISABLED_##I420##name##To##width##x##height##_##filter##_16) { \
int diff = I420TestFilter_16( \
benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
ASSERT_LE(diff, max_diff); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, \
DISABLED_##I444##name##To##width##x##height##_##filter##_16) { \
int diff = I444TestFilter_16( \
benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
ASSERT_LE(diff, max_diff); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, NV12##name##To##width##x##height##_##filter) { \
int diff = NV12TestFilter(benchmark_width_, benchmark_height_, width, \
height, kFilter##filter, benchmark_iterations_, \
disable_cpu_flags_, benchmark_cpu_info_); \
ASSERT_LE(diff, max_diff); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, I420##name##From##width##x##height##_##filter) { \
int diff = I420TestFilter(width, height, Abs(benchmark_width_), \
Abs(benchmark_height_), kFilter##filter, \
benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
ASSERT_LE(diff, max_diff); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, I444##name##From##width##x##height##_##filter) { \
int diff = I444TestFilter(width, height, Abs(benchmark_width_), \
Abs(benchmark_height_), kFilter##filter, \
benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
ASSERT_LE(diff, max_diff); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, \
DISABLED_##I420##name##From##width##x##height##_##filter##_12) { \
@ -993,7 +993,7 @@ TEST_FACTOR(3, 1, 3, 0)
Abs(benchmark_height_), kFilter##filter, \
benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
ASSERT_LE(diff, max_diff); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, \
DISABLED_##I444##name##From##width##x##height##_##filter##_12) { \
@ -1001,7 +1001,7 @@ TEST_FACTOR(3, 1, 3, 0)
Abs(benchmark_height_), kFilter##filter, \
benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
ASSERT_LE(diff, max_diff); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, \
DISABLED_##I420##name##From##width##x##height##_##filter##_16) { \
@ -1009,7 +1009,7 @@ TEST_FACTOR(3, 1, 3, 0)
Abs(benchmark_height_), kFilter##filter, \
benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
ASSERT_LE(diff, max_diff); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, \
DISABLED_##I444##name##From##width##x##height##_##filter##_16) { \
@ -1017,14 +1017,14 @@ TEST_FACTOR(3, 1, 3, 0)
Abs(benchmark_height_), kFilter##filter, \
benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
ASSERT_LE(diff, max_diff); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, NV12##name##From##width##x##height##_##filter) { \
int diff = NV12TestFilter(width, height, Abs(benchmark_width_), \
Abs(benchmark_height_), kFilter##filter, \
benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
ASSERT_LE(diff, max_diff); \
EXPECT_LE(diff, max_diff); \
}
#ifndef DISABLE_SLOW_TESTS
@ -1068,49 +1068,49 @@ TEST_SCALETO(Scale, 1080, 1920) // for rotated phones
benchmark_height_, benchmark_width_, \
kFilter##filter, benchmark_iterations_, \
disable_cpu_flags_, benchmark_cpu_info_); \
ASSERT_LE(diff, max_diff); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, I444##name##SwapXY_##filter) { \
int diff = I444TestFilter(benchmark_width_, benchmark_height_, \
benchmark_height_, benchmark_width_, \
kFilter##filter, benchmark_iterations_, \
disable_cpu_flags_, benchmark_cpu_info_); \
ASSERT_LE(diff, max_diff); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, DISABLED_##I420##name##SwapXY_##filter##_12) { \
int diff = I420TestFilter_12(benchmark_width_, benchmark_height_, \
benchmark_height_, benchmark_width_, \
kFilter##filter, benchmark_iterations_, \
disable_cpu_flags_, benchmark_cpu_info_); \
ASSERT_LE(diff, max_diff); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, DISABLED_##I444##name##SwapXY_##filter##_12) { \
int diff = I444TestFilter_12(benchmark_width_, benchmark_height_, \
benchmark_height_, benchmark_width_, \
kFilter##filter, benchmark_iterations_, \
disable_cpu_flags_, benchmark_cpu_info_); \
ASSERT_LE(diff, max_diff); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, DISABLED_##I420##name##SwapXY_##filter##_16) { \
int diff = I420TestFilter_16(benchmark_width_, benchmark_height_, \
benchmark_height_, benchmark_width_, \
kFilter##filter, benchmark_iterations_, \
disable_cpu_flags_, benchmark_cpu_info_); \
ASSERT_LE(diff, max_diff); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, DISABLED_##I444##name##SwapXY_##filter##_16) { \
int diff = I444TestFilter_16(benchmark_width_, benchmark_height_, \
benchmark_height_, benchmark_width_, \
kFilter##filter, benchmark_iterations_, \
disable_cpu_flags_, benchmark_cpu_info_); \
ASSERT_LE(diff, max_diff); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, NV12##name##SwapXY_##filter) { \
int diff = NV12TestFilter(benchmark_width_, benchmark_height_, \
benchmark_height_, benchmark_width_, \
kFilter##filter, benchmark_iterations_, \
disable_cpu_flags_, benchmark_cpu_info_); \
ASSERT_LE(diff, max_diff); \
EXPECT_LE(diff, max_diff); \
}
// Test scale to a specified size with all 4 filters.

View File

@ -101,7 +101,7 @@ static int UVTestFilter(int src_width,
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
ASSERT_EQ(0, diff); \
EXPECT_EQ(0, diff); \
}
#if defined(ENABLE_FULL_TESTS)
@ -132,14 +132,14 @@ TEST_FACTOR(3, 1, 3)
int diff = UVTestFilter(benchmark_width_, benchmark_height_, width, \
height, kFilter##filter, benchmark_iterations_, \
disable_cpu_flags_, benchmark_cpu_info_); \
ASSERT_LE(diff, max_diff); \
EXPECT_LE(diff, max_diff); \
} \
TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter) { \
int diff = UVTestFilter(width, height, Abs(benchmark_width_), \
Abs(benchmark_height_), kFilter##filter, \
benchmark_iterations_, disable_cpu_flags_, \
benchmark_cpu_info_); \
ASSERT_LE(diff, max_diff); \
EXPECT_LE(diff, max_diff); \
}
#if defined(ENABLE_FULL_TESTS)
@ -171,7 +171,7 @@ TEST_SCALETO(UVScale, 1920, 1080)
UVTestFilter(benchmark_width_, benchmark_height_, benchmark_height_, \
benchmark_width_, kFilter##filter, benchmark_iterations_, \
disable_cpu_flags_, benchmark_cpu_info_); \
ASSERT_LE(diff, max_diff); \
EXPECT_LE(diff, max_diff); \
}
#if defined(ENABLE_FULL_TESTS)
@ -202,14 +202,14 @@ TEST_F(LibYUVScaleTest, UVTest3x) {
kFilterBilinear);
}
ASSERT_EQ(225, dest_pixels[0]);
ASSERT_EQ(255 - 225, dest_pixels[1]);
EXPECT_EQ(225, dest_pixels[0]);
EXPECT_EQ(255 - 225, dest_pixels[1]);
UVScale(orig_pixels, kSrcStride, 480, 3, dest_pixels, kDstStride, 160, 1,
kFilterNone);
ASSERT_EQ(225, dest_pixels[0]);
ASSERT_EQ(255 - 225, dest_pixels[1]);
EXPECT_EQ(225, dest_pixels[0]);
EXPECT_EQ(255 - 225, dest_pixels[1]);
free_aligned_buffer_page_end(dest_pixels);
free_aligned_buffer_page_end(orig_pixels);
@ -233,14 +233,14 @@ TEST_F(LibYUVScaleTest, UVTest4x) {
kFilterBilinear);
}
ASSERT_EQ(66, dest_pixels[0]);
ASSERT_EQ(190, dest_pixels[1]);
EXPECT_EQ(66, dest_pixels[0]);
EXPECT_EQ(190, dest_pixels[1]);
UVScale(orig_pixels, kSrcStride, 64, 4, dest_pixels, kDstStride, 16, 1,
kFilterNone);
ASSERT_EQ(2, dest_pixels[0]); // expect the 3rd pixel of the 3rd row
ASSERT_EQ(255 - 2, dest_pixels[1]);
EXPECT_EQ(2, dest_pixels[0]); // expect the 3rd pixel of the 3rd row
EXPECT_EQ(255 - 2, dest_pixels[1]);
free_aligned_buffer_page_end(dest_pixels);
free_aligned_buffer_page_end(orig_pixels);

View File

@ -169,6 +169,9 @@ static int TestCpuEnv(int cpu_info) {
if (TestEnv("LIBYUV_DISABLE_AMXINT8")) {
cpu_info &= ~libyuv::kCpuHasAMXINT8;
}
if (TestEnv("LIBYUV_DISABLE_AVX512BMM")) {
cpu_info &= ~libyuv::kCpuHasAVX512BMM;
}
#endif
if (TestEnv("LIBYUV_DISABLE_ASM")) {
cpu_info = libyuv::kCpuInitialized;

View File

@ -85,11 +85,10 @@ static inline bool SizeValid(int src_width,
#define align_buffer_page_end_16(var, size) \
uint16_t* var = NULL; \
uint8_t* var##_mem = \
reinterpret_cast<uint8_t*>(malloc(((size) * 2 + 4095 + 63) & ~4095)); \
reinterpret_cast<uint8_t*>(malloc(((size)*2 + 4095 + 63) & ~4095)); \
if (var##_mem) \
var = reinterpret_cast<uint16_t*>( \
(intptr_t)(var##_mem + (((size) * 2 + 4095 + 63) & ~4095) - \
(size) * 2) & \
(intptr_t)(var##_mem + (((size)*2 + 4095 + 63) & ~4095) - (size)*2) & \
~63)
#define free_aligned_buffer_page_end_16(var) \

View File

@ -36,77 +36,77 @@ static bool TestValidFourCC(uint32_t fourcc, int bpp) {
}
TEST_F(LibYUVBaseTest, TestCanonicalFourCC) {
ASSERT_EQ(static_cast<uint32_t>(FOURCC_I420), CanonicalFourCC(FOURCC_IYUV));
ASSERT_EQ(static_cast<uint32_t>(FOURCC_I420), CanonicalFourCC(FOURCC_YU12));
ASSERT_EQ(static_cast<uint32_t>(FOURCC_I422), CanonicalFourCC(FOURCC_YU16));
ASSERT_EQ(static_cast<uint32_t>(FOURCC_I444), CanonicalFourCC(FOURCC_YU24));
ASSERT_EQ(static_cast<uint32_t>(FOURCC_YUY2), CanonicalFourCC(FOURCC_YUYV));
ASSERT_EQ(static_cast<uint32_t>(FOURCC_YUY2), CanonicalFourCC(FOURCC_YUVS));
ASSERT_EQ(static_cast<uint32_t>(FOURCC_UYVY), CanonicalFourCC(FOURCC_HDYC));
ASSERT_EQ(static_cast<uint32_t>(FOURCC_UYVY), CanonicalFourCC(FOURCC_2VUY));
ASSERT_EQ(static_cast<uint32_t>(FOURCC_MJPG), CanonicalFourCC(FOURCC_JPEG));
ASSERT_EQ(static_cast<uint32_t>(FOURCC_MJPG), CanonicalFourCC(FOURCC_DMB1));
ASSERT_EQ(static_cast<uint32_t>(FOURCC_RAW), CanonicalFourCC(FOURCC_RGB3));
ASSERT_EQ(static_cast<uint32_t>(FOURCC_24BG), CanonicalFourCC(FOURCC_BGR3));
ASSERT_EQ(static_cast<uint32_t>(FOURCC_BGRA), CanonicalFourCC(FOURCC_CM32));
ASSERT_EQ(static_cast<uint32_t>(FOURCC_RAW), CanonicalFourCC(FOURCC_CM24));
ASSERT_EQ(static_cast<uint32_t>(FOURCC_RGBO), CanonicalFourCC(FOURCC_L555));
ASSERT_EQ(static_cast<uint32_t>(FOURCC_RGBP), CanonicalFourCC(FOURCC_L565));
ASSERT_EQ(static_cast<uint32_t>(FOURCC_RGBO), CanonicalFourCC(FOURCC_5551));
EXPECT_EQ(static_cast<uint32_t>(FOURCC_I420), CanonicalFourCC(FOURCC_IYUV));
EXPECT_EQ(static_cast<uint32_t>(FOURCC_I420), CanonicalFourCC(FOURCC_YU12));
EXPECT_EQ(static_cast<uint32_t>(FOURCC_I422), CanonicalFourCC(FOURCC_YU16));
EXPECT_EQ(static_cast<uint32_t>(FOURCC_I444), CanonicalFourCC(FOURCC_YU24));
EXPECT_EQ(static_cast<uint32_t>(FOURCC_YUY2), CanonicalFourCC(FOURCC_YUYV));
EXPECT_EQ(static_cast<uint32_t>(FOURCC_YUY2), CanonicalFourCC(FOURCC_YUVS));
EXPECT_EQ(static_cast<uint32_t>(FOURCC_UYVY), CanonicalFourCC(FOURCC_HDYC));
EXPECT_EQ(static_cast<uint32_t>(FOURCC_UYVY), CanonicalFourCC(FOURCC_2VUY));
EXPECT_EQ(static_cast<uint32_t>(FOURCC_MJPG), CanonicalFourCC(FOURCC_JPEG));
EXPECT_EQ(static_cast<uint32_t>(FOURCC_MJPG), CanonicalFourCC(FOURCC_DMB1));
EXPECT_EQ(static_cast<uint32_t>(FOURCC_RAW), CanonicalFourCC(FOURCC_RGB3));
EXPECT_EQ(static_cast<uint32_t>(FOURCC_24BG), CanonicalFourCC(FOURCC_BGR3));
EXPECT_EQ(static_cast<uint32_t>(FOURCC_BGRA), CanonicalFourCC(FOURCC_CM32));
EXPECT_EQ(static_cast<uint32_t>(FOURCC_RAW), CanonicalFourCC(FOURCC_CM24));
EXPECT_EQ(static_cast<uint32_t>(FOURCC_RGBO), CanonicalFourCC(FOURCC_L555));
EXPECT_EQ(static_cast<uint32_t>(FOURCC_RGBP), CanonicalFourCC(FOURCC_L565));
EXPECT_EQ(static_cast<uint32_t>(FOURCC_RGBO), CanonicalFourCC(FOURCC_5551));
}
TEST_F(LibYUVBaseTest, TestFourCC) {
ASSERT_TRUE(TestValidFourCC(FOURCC_I420, FOURCC_BPP_I420));
ASSERT_TRUE(TestValidFourCC(FOURCC_I420, FOURCC_BPP_I420));
ASSERT_TRUE(TestValidFourCC(FOURCC_I422, FOURCC_BPP_I422));
ASSERT_TRUE(TestValidFourCC(FOURCC_I444, FOURCC_BPP_I444));
ASSERT_TRUE(TestValidFourCC(FOURCC_I400, FOURCC_BPP_I400));
ASSERT_TRUE(TestValidFourCC(FOURCC_NV21, FOURCC_BPP_NV21));
ASSERT_TRUE(TestValidFourCC(FOURCC_NV12, FOURCC_BPP_NV12));
ASSERT_TRUE(TestValidFourCC(FOURCC_YUY2, FOURCC_BPP_YUY2));
ASSERT_TRUE(TestValidFourCC(FOURCC_UYVY, FOURCC_BPP_UYVY));
ASSERT_TRUE(TestValidFourCC(FOURCC_M420, FOURCC_BPP_M420)); // deprecated.
ASSERT_TRUE(TestValidFourCC(FOURCC_Q420, FOURCC_BPP_Q420)); // deprecated.
ASSERT_TRUE(TestValidFourCC(FOURCC_ARGB, FOURCC_BPP_ARGB));
ASSERT_TRUE(TestValidFourCC(FOURCC_BGRA, FOURCC_BPP_BGRA));
ASSERT_TRUE(TestValidFourCC(FOURCC_ABGR, FOURCC_BPP_ABGR));
ASSERT_TRUE(TestValidFourCC(FOURCC_AR30, FOURCC_BPP_AR30));
ASSERT_TRUE(TestValidFourCC(FOURCC_AB30, FOURCC_BPP_AB30));
ASSERT_TRUE(TestValidFourCC(FOURCC_AR64, FOURCC_BPP_AR64));
ASSERT_TRUE(TestValidFourCC(FOURCC_AB64, FOURCC_BPP_AB64));
ASSERT_TRUE(TestValidFourCC(FOURCC_24BG, FOURCC_BPP_24BG));
ASSERT_TRUE(TestValidFourCC(FOURCC_RAW, FOURCC_BPP_RAW));
ASSERT_TRUE(TestValidFourCC(FOURCC_RGBA, FOURCC_BPP_RGBA));
ASSERT_TRUE(TestValidFourCC(FOURCC_RGBP, FOURCC_BPP_RGBP));
ASSERT_TRUE(TestValidFourCC(FOURCC_RGBO, FOURCC_BPP_RGBO));
ASSERT_TRUE(TestValidFourCC(FOURCC_R444, FOURCC_BPP_R444));
ASSERT_TRUE(TestValidFourCC(FOURCC_H420, FOURCC_BPP_H420));
ASSERT_TRUE(TestValidFourCC(FOURCC_H422, FOURCC_BPP_H422));
ASSERT_TRUE(TestValidFourCC(FOURCC_H010, FOURCC_BPP_H010));
ASSERT_TRUE(TestValidFourCC(FOURCC_H210, FOURCC_BPP_H210));
ASSERT_TRUE(TestValidFourCC(FOURCC_I010, FOURCC_BPP_I010));
ASSERT_TRUE(TestValidFourCC(FOURCC_I210, FOURCC_BPP_I210));
ASSERT_TRUE(TestValidFourCC(FOURCC_P010, FOURCC_BPP_P010));
ASSERT_TRUE(TestValidFourCC(FOURCC_P210, FOURCC_BPP_P210));
ASSERT_TRUE(TestValidFourCC(FOURCC_MJPG, FOURCC_BPP_MJPG));
ASSERT_TRUE(TestValidFourCC(FOURCC_YV12, FOURCC_BPP_YV12));
ASSERT_TRUE(TestValidFourCC(FOURCC_YV16, FOURCC_BPP_YV16));
ASSERT_TRUE(TestValidFourCC(FOURCC_YV24, FOURCC_BPP_YV24));
ASSERT_TRUE(TestValidFourCC(FOURCC_YU12, FOURCC_BPP_YU12));
ASSERT_TRUE(TestValidFourCC(FOURCC_IYUV, FOURCC_BPP_IYUV));
ASSERT_TRUE(TestValidFourCC(FOURCC_YU16, FOURCC_BPP_YU16));
ASSERT_TRUE(TestValidFourCC(FOURCC_YU24, FOURCC_BPP_YU24));
ASSERT_TRUE(TestValidFourCC(FOURCC_YUYV, FOURCC_BPP_YUYV));
ASSERT_TRUE(TestValidFourCC(FOURCC_YUVS, FOURCC_BPP_YUVS));
ASSERT_TRUE(TestValidFourCC(FOURCC_HDYC, FOURCC_BPP_HDYC));
ASSERT_TRUE(TestValidFourCC(FOURCC_2VUY, FOURCC_BPP_2VUY));
ASSERT_TRUE(TestValidFourCC(FOURCC_JPEG, FOURCC_BPP_JPEG));
ASSERT_TRUE(TestValidFourCC(FOURCC_DMB1, FOURCC_BPP_DMB1));
ASSERT_TRUE(TestValidFourCC(FOURCC_BA81, FOURCC_BPP_BA81));
ASSERT_TRUE(TestValidFourCC(FOURCC_RGB3, FOURCC_BPP_RGB3));
ASSERT_TRUE(TestValidFourCC(FOURCC_BGR3, FOURCC_BPP_BGR3));
ASSERT_TRUE(TestValidFourCC(FOURCC_H264, FOURCC_BPP_H264));
ASSERT_TRUE(TestValidFourCC(FOURCC_ANY, FOURCC_BPP_ANY));
EXPECT_TRUE(TestValidFourCC(FOURCC_I420, FOURCC_BPP_I420));
EXPECT_TRUE(TestValidFourCC(FOURCC_I420, FOURCC_BPP_I420));
EXPECT_TRUE(TestValidFourCC(FOURCC_I422, FOURCC_BPP_I422));
EXPECT_TRUE(TestValidFourCC(FOURCC_I444, FOURCC_BPP_I444));
EXPECT_TRUE(TestValidFourCC(FOURCC_I400, FOURCC_BPP_I400));
EXPECT_TRUE(TestValidFourCC(FOURCC_NV21, FOURCC_BPP_NV21));
EXPECT_TRUE(TestValidFourCC(FOURCC_NV12, FOURCC_BPP_NV12));
EXPECT_TRUE(TestValidFourCC(FOURCC_YUY2, FOURCC_BPP_YUY2));
EXPECT_TRUE(TestValidFourCC(FOURCC_UYVY, FOURCC_BPP_UYVY));
EXPECT_TRUE(TestValidFourCC(FOURCC_M420, FOURCC_BPP_M420)); // deprecated.
EXPECT_TRUE(TestValidFourCC(FOURCC_Q420, FOURCC_BPP_Q420)); // deprecated.
EXPECT_TRUE(TestValidFourCC(FOURCC_ARGB, FOURCC_BPP_ARGB));
EXPECT_TRUE(TestValidFourCC(FOURCC_BGRA, FOURCC_BPP_BGRA));
EXPECT_TRUE(TestValidFourCC(FOURCC_ABGR, FOURCC_BPP_ABGR));
EXPECT_TRUE(TestValidFourCC(FOURCC_AR30, FOURCC_BPP_AR30));
EXPECT_TRUE(TestValidFourCC(FOURCC_AB30, FOURCC_BPP_AB30));
EXPECT_TRUE(TestValidFourCC(FOURCC_AR64, FOURCC_BPP_AR64));
EXPECT_TRUE(TestValidFourCC(FOURCC_AB64, FOURCC_BPP_AB64));
EXPECT_TRUE(TestValidFourCC(FOURCC_24BG, FOURCC_BPP_24BG));
EXPECT_TRUE(TestValidFourCC(FOURCC_RAW, FOURCC_BPP_RAW));
EXPECT_TRUE(TestValidFourCC(FOURCC_RGBA, FOURCC_BPP_RGBA));
EXPECT_TRUE(TestValidFourCC(FOURCC_RGBP, FOURCC_BPP_RGBP));
EXPECT_TRUE(TestValidFourCC(FOURCC_RGBO, FOURCC_BPP_RGBO));
EXPECT_TRUE(TestValidFourCC(FOURCC_R444, FOURCC_BPP_R444));
EXPECT_TRUE(TestValidFourCC(FOURCC_H420, FOURCC_BPP_H420));
EXPECT_TRUE(TestValidFourCC(FOURCC_H422, FOURCC_BPP_H422));
EXPECT_TRUE(TestValidFourCC(FOURCC_H010, FOURCC_BPP_H010));
EXPECT_TRUE(TestValidFourCC(FOURCC_H210, FOURCC_BPP_H210));
EXPECT_TRUE(TestValidFourCC(FOURCC_I010, FOURCC_BPP_I010));
EXPECT_TRUE(TestValidFourCC(FOURCC_I210, FOURCC_BPP_I210));
EXPECT_TRUE(TestValidFourCC(FOURCC_P010, FOURCC_BPP_P010));
EXPECT_TRUE(TestValidFourCC(FOURCC_P210, FOURCC_BPP_P210));
EXPECT_TRUE(TestValidFourCC(FOURCC_MJPG, FOURCC_BPP_MJPG));
EXPECT_TRUE(TestValidFourCC(FOURCC_YV12, FOURCC_BPP_YV12));
EXPECT_TRUE(TestValidFourCC(FOURCC_YV16, FOURCC_BPP_YV16));
EXPECT_TRUE(TestValidFourCC(FOURCC_YV24, FOURCC_BPP_YV24));
EXPECT_TRUE(TestValidFourCC(FOURCC_YU12, FOURCC_BPP_YU12));
EXPECT_TRUE(TestValidFourCC(FOURCC_IYUV, FOURCC_BPP_IYUV));
EXPECT_TRUE(TestValidFourCC(FOURCC_YU16, FOURCC_BPP_YU16));
EXPECT_TRUE(TestValidFourCC(FOURCC_YU24, FOURCC_BPP_YU24));
EXPECT_TRUE(TestValidFourCC(FOURCC_YUYV, FOURCC_BPP_YUYV));
EXPECT_TRUE(TestValidFourCC(FOURCC_YUVS, FOURCC_BPP_YUVS));
EXPECT_TRUE(TestValidFourCC(FOURCC_HDYC, FOURCC_BPP_HDYC));
EXPECT_TRUE(TestValidFourCC(FOURCC_2VUY, FOURCC_BPP_2VUY));
EXPECT_TRUE(TestValidFourCC(FOURCC_JPEG, FOURCC_BPP_JPEG));
EXPECT_TRUE(TestValidFourCC(FOURCC_DMB1, FOURCC_BPP_DMB1));
EXPECT_TRUE(TestValidFourCC(FOURCC_BA81, FOURCC_BPP_BA81));
EXPECT_TRUE(TestValidFourCC(FOURCC_RGB3, FOURCC_BPP_RGB3));
EXPECT_TRUE(TestValidFourCC(FOURCC_BGR3, FOURCC_BPP_BGR3));
EXPECT_TRUE(TestValidFourCC(FOURCC_H264, FOURCC_BPP_H264));
EXPECT_TRUE(TestValidFourCC(FOURCC_ANY, FOURCC_BPP_ANY));
}
} // namespace libyuv

View File

@ -15,6 +15,8 @@
#ifdef __linux__
#include <ctype.h>
#include <sys/utsname.h>
#include <signal.h>
#include <setjmp.h>
#endif
#include "libyuv/cpu_id.h"
@ -40,6 +42,14 @@ static void KernelVersion(int* version) {
}
#endif
#ifdef __linux__
static sigjmp_buf vdpphps_jmpbuf;
static void vdpphps_sigill_handler(int sig) {
(void)sig;
siglongjmp(vdpphps_jmpbuf, 1);
}
#endif
int main(int argc, const char* argv[]) {
(void)argc;
(void)argv;
@ -182,6 +192,7 @@ int main(int argc, const char* argv[]) {
int has_avxvnni = TestCpuFlag(kCpuHasAVXVNNI);
int has_avxvnniint8 = TestCpuFlag(kCpuHasAVXVNNIINT8);
int has_amxint8 = TestCpuFlag(kCpuHasAMXINT8);
int has_avx512bmm = TestCpuFlag(kCpuHasAVX512BMM);
printf("Has X86 0x%x\n", has_x86);
printf("Has SSE2 0x%x\n", has_sse2);
printf("Has SSSE3 0x%x\n", has_ssse3);
@ -204,6 +215,30 @@ int main(int argc, const char* argv[]) {
printf("HAS AVXVNNI 0x%x\n", has_avxvnni);
printf("Has AVXVNNIINT8 0x%x\n", has_avxvnniint8);
printf("Has AMXINT8 0x%x\n", has_amxint8);
printf("Has AVX512BMM 0x%x\n", has_avx512bmm);
#ifdef __linux__
// Test VDPPHPS instruction
{
struct sigaction act, oldact;
memset(&act, 0, sizeof(act));
act.sa_handler = vdpphps_sigill_handler;
sigaction(SIGILL, &act, &oldact);
printf("Testing VDPPHPS instruction... ");
fflush(stdout);
if (sigsetjmp(vdpphps_jmpbuf, 1) == 0) {
// VDPPHPS xmm0, xmm0, xmm0
__asm__ volatile("vdpphps %%xmm0, %%xmm0, %%xmm0" : : : "xmm0");
printf("Works!\n");
} else {
printf("Crashed (SIGILL)!\n");
}
sigaction(SIGILL, &oldact, NULL);
}
#endif
}
#endif // defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) ||
// defined(_M_X64)

View File

@ -244,23 +244,23 @@ double GetSSIMFullKernel(const uint8_t* org,
// Read 8 pixels at line #L, and convert to 16bit, perform weighting
// and acccumulate.
#define LOAD_LINE_PAIR(L, WEIGHT) \
do { \
const __m128i v0 = \
_mm_loadl_epi64(reinterpret_cast<const __m128i*>(org + (L) * stride)); \
const __m128i v1 = \
_mm_loadl_epi64(reinterpret_cast<const __m128i*>(rec + (L) * stride)); \
const __m128i w0 = _mm_unpacklo_epi8(v0, zero); \
const __m128i w1 = _mm_unpacklo_epi8(v1, zero); \
const __m128i ww0 = _mm_mullo_epi16(w0, (WEIGHT).values_.m_); \
const __m128i ww1 = _mm_mullo_epi16(w1, (WEIGHT).values_.m_); \
x = _mm_add_epi32(x, _mm_unpacklo_epi16(ww0, zero)); \
y = _mm_add_epi32(y, _mm_unpacklo_epi16(ww1, zero)); \
x = _mm_add_epi32(x, _mm_unpackhi_epi16(ww0, zero)); \
y = _mm_add_epi32(y, _mm_unpackhi_epi16(ww1, zero)); \
xx = _mm_add_epi32(xx, _mm_madd_epi16(ww0, w0)); \
xy = _mm_add_epi32(xy, _mm_madd_epi16(ww0, w1)); \
yy = _mm_add_epi32(yy, _mm_madd_epi16(ww1, w1)); \
#define LOAD_LINE_PAIR(L, WEIGHT) \
do { \
const __m128i v0 = \
_mm_loadl_epi64(reinterpret_cast<const __m128i*>(org + (L)*stride)); \
const __m128i v1 = \
_mm_loadl_epi64(reinterpret_cast<const __m128i*>(rec + (L)*stride)); \
const __m128i w0 = _mm_unpacklo_epi8(v0, zero); \
const __m128i w1 = _mm_unpacklo_epi8(v1, zero); \
const __m128i ww0 = _mm_mullo_epi16(w0, (WEIGHT).values_.m_); \
const __m128i ww1 = _mm_mullo_epi16(w1, (WEIGHT).values_.m_); \
x = _mm_add_epi32(x, _mm_unpacklo_epi16(ww0, zero)); \
y = _mm_add_epi32(y, _mm_unpacklo_epi16(ww1, zero)); \
x = _mm_add_epi32(x, _mm_unpackhi_epi16(ww0, zero)); \
y = _mm_add_epi32(y, _mm_unpackhi_epi16(ww1, zero)); \
xx = _mm_add_epi32(xx, _mm_madd_epi16(ww0, w0)); \
xy = _mm_add_epi32(xy, _mm_madd_epi16(ww0, w1)); \
yy = _mm_add_epi32(yy, _mm_madd_epi16(ww1, w1)); \
} while (0)
#define ADD_AND_STORE_FOUR_EPI32(M, OUT) \