Apply clang format

Bug: None
Change-Id: I0d9db4b384144523e61ae32b6ab3f72e93a0c265
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/6138934
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
Reviewed-by: Wan-Teh Chang <wtc@google.com>
This commit is contained in:
Frank Barchard 2025-01-02 13:20:17 -08:00
parent b5a18f9d93
commit e0040eb318
33 changed files with 1808 additions and 1804 deletions

View File

@ -67,7 +67,6 @@ static const int kCpuHasLOONGARCH = 0x20;
static const int kCpuHasLSX = 0x100; static const int kCpuHasLSX = 0x100;
static const int kCpuHasLASX = 0x200; static const int kCpuHasLASX = 0x200;
// Optional init function. TestCpuFlag does an auto-init. // Optional init function. TestCpuFlag does an auto-init.
// Returns cpu_info flags. // Returns cpu_info flags.
LIBYUV_API LIBYUV_API

View File

@ -499,8 +499,8 @@ static inline void I422ToRGB565Row_SVE_SC(
// Calculate a predicate for the final iteration to deal with the tail. // Calculate a predicate for the final iteration to deal with the tail.
"cnth %[vl] \n" "cnth %[vl] \n"
"whilelt p1.b, wzr, %w[width] \n" // "whilelt p1.b, wzr, %w[width] \n" //
READYUV422_SVE_2X I422TORGB_SVE_2X READYUV422_SVE_2X I422TORGB_SVE_2X RGBTOARGB8_SVE_TOP_2X
RGBTOARGB8_SVE_TOP_2X RGB8TORGB565_SVE_FROM_TOP_2X RGB8TORGB565_SVE_FROM_TOP_2X
"st2h {z18.h, z19.h}, p1, [%[dst]] \n" "st2h {z18.h, z19.h}, p1, [%[dst]] \n"
"99: \n" "99: \n"
@ -558,8 +558,8 @@ static inline void I422ToARGB1555Row_SVE_SC(
// Calculate a predicate for the final iteration to deal with the tail. // Calculate a predicate for the final iteration to deal with the tail.
"cnth %[vl] \n" "cnth %[vl] \n"
"whilelt p1.b, wzr, %w[width] \n" // "whilelt p1.b, wzr, %w[width] \n" //
READYUV422_SVE_2X I422TORGB_SVE_2X READYUV422_SVE_2X I422TORGB_SVE_2X RGBTOARGB8_SVE_TOP_2X
RGBTOARGB8_SVE_TOP_2X RGB8TOARGB1555_SVE_FROM_TOP_2X RGB8TOARGB1555_SVE_FROM_TOP_2X
"st2h {z0.h, z1.h}, p1, [%[dst]] \n" "st2h {z0.h, z1.h}, p1, [%[dst]] \n"
"99: \n" "99: \n"
@ -617,8 +617,8 @@ static inline void I422ToARGB4444Row_SVE_SC(
// Calculate a predicate for the final iteration to deal with the tail. // Calculate a predicate for the final iteration to deal with the tail.
"cnth %[vl] \n" "cnth %[vl] \n"
"whilelt p1.b, wzr, %w[width] \n" // "whilelt p1.b, wzr, %w[width] \n" //
READYUV422_SVE_2X I422TORGB_SVE_2X READYUV422_SVE_2X I422TORGB_SVE_2X RGBTOARGB8_SVE_TOP_2X
RGBTOARGB8_SVE_TOP_2X RGB8TOARGB4444_SVE_FROM_TOP_2X RGB8TOARGB4444_SVE_FROM_TOP_2X
"st2h {z0.h, z1.h}, p1, [%[dst]] \n" "st2h {z0.h, z1.h}, p1, [%[dst]] \n"
"99: \n" "99: \n"

View File

@ -70,9 +70,8 @@ int ConvertToARGB(const uint8_t* sample,
uint8_t* rotate_buffer = NULL; uint8_t* rotate_buffer = NULL;
int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height; int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;
if (dst_argb == NULL || sample == NULL || if (dst_argb == NULL || sample == NULL || src_width <= 0 ||
src_width <= 0 || src_width > INT_MAX / 4 || src_width > INT_MAX / 4 || crop_width <= 0 || crop_width > INT_MAX / 4 ||
crop_width <= 0 || crop_width > INT_MAX / 4 ||
src_height == 0 || crop_height == 0) { src_height == 0 || crop_height == 0) {
return -1; return -1;
} }
@ -81,7 +80,8 @@ int ConvertToARGB(const uint8_t* sample,
} }
if (need_buf) { if (need_buf) {
const uint64_t rotate_buffer_size = (uint64_t)crop_width * 4 * abs_crop_height; const uint64_t rotate_buffer_size =
(uint64_t)crop_width * 4 * abs_crop_height;
if (rotate_buffer_size > SIZE_MAX) { if (rotate_buffer_size > SIZE_MAX) {
return -1; // Invalid size. return -1; // Invalid size.
} }

View File

@ -65,8 +65,9 @@ int ConvertToI420(const uint8_t* sample,
const int inv_crop_height = const int inv_crop_height =
(src_height < 0) ? -abs_crop_height : abs_crop_height; (src_height < 0) ? -abs_crop_height : abs_crop_height;
if (!dst_y || !dst_u || !dst_v || !sample || src_width <= 0 || src_width > INT_MAX / 4 || if (!dst_y || !dst_u || !dst_v || !sample || src_width <= 0 ||
crop_width <= 0 || src_height == 0 || crop_height == 0) { src_width > INT_MAX / 4 || crop_width <= 0 || src_height == 0 ||
crop_height == 0) {
return -1; return -1;
} }
@ -78,7 +79,8 @@ int ConvertToI420(const uint8_t* sample,
if (need_buf) { if (need_buf) {
int y_size = crop_width * abs_crop_height; int y_size = crop_width * abs_crop_height;
int uv_size = ((crop_width + 1) / 2) * ((abs_crop_height + 1) / 2); int uv_size = ((crop_width + 1) / 2) * ((abs_crop_height + 1) / 2);
const uint64_t rotate_buffer_size = (uint64_t)y_size + (uint64_t)uv_size * 2; const uint64_t rotate_buffer_size =
(uint64_t)y_size + (uint64_t)uv_size * 2;
if (rotate_buffer_size > SIZE_MAX) { if (rotate_buffer_size > SIZE_MAX) {
return -1; // Invalid size. return -1; // Invalid size.
} }

View File

@ -191,7 +191,8 @@ static int ARGBRotate180(const uint8_t* src_argb,
#endif #endif
#if defined(HAS_COPYROW_AVX512BW) #if defined(HAS_COPYROW_AVX512BW)
if (TestCpuFlag(kCpuHasAVX512BW)) { if (TestCpuFlag(kCpuHasAVX512BW)) {
CopyRow = IS_ALIGNED(width * 4, 128) ? CopyRow_AVX512BW : CopyRow_Any_AVX512BW; CopyRow =
IS_ALIGNED(width * 4, 128) ? CopyRow_AVX512BW : CopyRow_Any_AVX512BW;
} }
#endif #endif
#if defined(HAS_COPYROW_ERMS) #if defined(HAS_COPYROW_ERMS)

View File

@ -475,8 +475,7 @@ void ARGB4444ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width) {
} }
void ARGBToRGB24Row_SSSE3(const uint8_t* src, uint8_t* dst, int width) { void ARGBToRGB24Row_SSSE3(const uint8_t* src, uint8_t* dst, int width) {
asm volatile ( asm volatile("movdqa %3,%%xmm6 \n"
"movdqa %3,%%xmm6 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
@ -509,12 +508,12 @@ void ARGBToRGB24Row_SSSE3(const uint8_t* src, uint8_t* dst, int width) {
"+r"(dst), // %1 "+r"(dst), // %1
"+r"(width) // %2 "+r"(width) // %2
: "m"(kShuffleMaskARGBToRGB24) // %3 : "m"(kShuffleMaskARGBToRGB24) // %3
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5",
"xmm6");
} }
void ARGBToRAWRow_SSSE3(const uint8_t* src, uint8_t* dst, int width) { void ARGBToRAWRow_SSSE3(const uint8_t* src, uint8_t* dst, int width) {
asm volatile ( asm volatile("movdqa %3,%%xmm6 \n"
"movdqa %3,%%xmm6 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
@ -547,7 +546,8 @@ void ARGBToRAWRow_SSSE3(const uint8_t* src, uint8_t* dst, int width) {
"+r"(dst), // %1 "+r"(dst), // %1
"+r"(width) // %2 "+r"(width) // %2
: "m"(kShuffleMaskARGBToRAW) // %3 : "m"(kShuffleMaskARGBToRAW) // %3
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5",
"xmm6");
} }
#ifdef HAS_ARGBTORGB24ROW_AVX2 #ifdef HAS_ARGBTORGB24ROW_AVX2
@ -1159,8 +1159,7 @@ void AR64ToARGBRow_SSSE3(const uint16_t* src_ar64,
void AB64ToARGBRow_SSSE3(const uint16_t* src_ab64, void AB64ToARGBRow_SSSE3(const uint16_t* src_ab64,
uint8_t* dst_argb, uint8_t* dst_argb,
int width) { int width) {
asm volatile ( asm volatile("movdqa %3,%%xmm2 \n"
"movdqa %3,%%xmm2 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
@ -1265,8 +1264,7 @@ void AR64ToARGBRow_AVX2(const uint16_t* src_ar64,
void AB64ToARGBRow_AVX2(const uint16_t* src_ab64, void AB64ToARGBRow_AVX2(const uint16_t* src_ab64,
uint8_t* dst_argb, uint8_t* dst_argb,
int width) { int width) {
asm volatile ( asm volatile("vbroadcastf128 %3,%%ymm2 \n" LABELALIGN
"vbroadcastf128 %3,%%ymm2 \n" LABELALIGN
"1: \n" "1: \n"
"vmovdqu (%0),%%ymm0 \n" "vmovdqu (%0),%%ymm0 \n"
"vmovdqu 0x20(%0),%%ymm1 \n" "vmovdqu 0x20(%0),%%ymm1 \n"
@ -2657,7 +2655,8 @@ void OMITFP I444AlphaToARGBRow_SSSE3(const uint8_t* y_buf,
uint8_t* dst_argb, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width) { int width) {
asm volatile (YUVTORGB_SETUP( asm volatile(
YUVTORGB_SETUP(
yuvconstants) "sub %[u_buf],%[v_buf] \n" yuvconstants) "sub %[u_buf],%[v_buf] \n"
LABELALIGN "1: \n" READYUVA444 LABELALIGN "1: \n" READYUVA444
@ -2983,7 +2982,8 @@ void OMITFP I210AlphaToARGBRow_SSSE3(const uint16_t* y_buf,
uint8_t* dst_argb, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width) { int width) {
asm volatile (YUVTORGB_SETUP( asm volatile(
YUVTORGB_SETUP(
yuvconstants) "sub %[u_buf],%[v_buf] \n" yuvconstants) "sub %[u_buf],%[v_buf] \n"
LABELALIGN "1: \n" READYUVA210 LABELALIGN "1: \n" READYUVA210
@ -3015,7 +3015,8 @@ void OMITFP I410AlphaToARGBRow_SSSE3(const uint16_t* y_buf,
uint8_t* dst_argb, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width) { int width) {
asm volatile (YUVTORGB_SETUP( asm volatile(
YUVTORGB_SETUP(
yuvconstants) "sub %[u_buf],%[v_buf] \n" yuvconstants) "sub %[u_buf],%[v_buf] \n"
LABELALIGN "1: \n" READYUVA410 LABELALIGN "1: \n" READYUVA410
@ -3081,7 +3082,8 @@ void OMITFP I422AlphaToARGBRow_SSSE3(const uint8_t* y_buf,
uint8_t* dst_argb, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width) { int width) {
asm volatile (YUVTORGB_SETUP( asm volatile(
YUVTORGB_SETUP(
yuvconstants) "sub %[u_buf],%[v_buf] \n" yuvconstants) "sub %[u_buf],%[v_buf] \n"
LABELALIGN "1: \n" READYUVA422 LABELALIGN "1: \n" READYUVA422
@ -3109,7 +3111,8 @@ void OMITFP NV12ToARGBRow_SSSE3(const uint8_t* y_buf,
uint8_t* dst_argb, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width) { int width) {
asm volatile (YUVTORGB_SETUP( asm volatile(
YUVTORGB_SETUP(
yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n" yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n"
LABELALIGN "1: \n" READNV12 LABELALIGN "1: \n" READNV12
@ -3130,7 +3133,8 @@ void OMITFP NV21ToARGBRow_SSSE3(const uint8_t* y_buf,
uint8_t* dst_argb, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width) { int width) {
asm volatile (YUVTORGB_SETUP( asm volatile(
YUVTORGB_SETUP(
yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n" yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n"
LABELALIGN "1: \n" READNV21 LABELALIGN "1: \n" READNV21
@ -3196,7 +3200,8 @@ void OMITFP P210ToARGBRow_SSSE3(const uint16_t* y_buf,
uint8_t* dst_argb, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width) { int width) {
asm volatile (YUVTORGB_SETUP( asm volatile(
YUVTORGB_SETUP(
yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n" yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n"
LABELALIGN "1: \n" READP210 LABELALIGN "1: \n" READP210
@ -3217,7 +3222,8 @@ void OMITFP P410ToARGBRow_SSSE3(const uint16_t* y_buf,
uint8_t* dst_argb, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width) { int width) {
asm volatile (YUVTORGB_SETUP( asm volatile(
YUVTORGB_SETUP(
yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n" yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n"
LABELALIGN "1: \n" READP410 LABELALIGN "1: \n" READP410
@ -4051,7 +4057,8 @@ void OMITFP I210AlphaToARGBRow_AVX2(const uint16_t* y_buf,
uint8_t* dst_argb, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width) { int width) {
asm volatile (YUVTORGB_SETUP_AVX2( asm volatile(
YUVTORGB_SETUP_AVX2(
yuvconstants) "sub %[u_buf],%[v_buf] \n" yuvconstants) "sub %[u_buf],%[v_buf] \n"
LABELALIGN "1: \n" READYUVA210_AVX2 LABELALIGN "1: \n" READYUVA210_AVX2
@ -4086,7 +4093,8 @@ void OMITFP I410AlphaToARGBRow_AVX2(const uint16_t* y_buf,
uint8_t* dst_argb, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width) { int width) {
asm volatile (YUVTORGB_SETUP_AVX2( asm volatile(
YUVTORGB_SETUP_AVX2(
yuvconstants) "sub %[u_buf],%[v_buf] \n" yuvconstants) "sub %[u_buf],%[v_buf] \n"
LABELALIGN "1: \n" READYUVA410_AVX2 LABELALIGN "1: \n" READYUVA410_AVX2
@ -4161,7 +4169,8 @@ void OMITFP I444AlphaToARGBRow_AVX2(const uint8_t* y_buf,
uint8_t* dst_argb, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width) { int width) {
asm volatile (YUVTORGB_SETUP_AVX2( asm volatile(
YUVTORGB_SETUP_AVX2(
yuvconstants) "sub %[u_buf],%[v_buf] \n" yuvconstants) "sub %[u_buf],%[v_buf] \n"
LABELALIGN "1: \n" READYUVA444_AVX2 LABELALIGN "1: \n" READYUVA444_AVX2
@ -4195,7 +4204,8 @@ void OMITFP I422AlphaToARGBRow_AVX2(const uint8_t* y_buf,
uint8_t* dst_argb, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width) { int width) {
asm volatile (YUVTORGB_SETUP_AVX2( asm volatile(
YUVTORGB_SETUP_AVX2(
yuvconstants) "sub %[u_buf],%[v_buf] \n" yuvconstants) "sub %[u_buf],%[v_buf] \n"
LABELALIGN "1: \n" READYUVA422_AVX2 LABELALIGN "1: \n" READYUVA422_AVX2
@ -4271,7 +4281,8 @@ void OMITFP NV12ToARGBRow_AVX2(const uint8_t* y_buf,
uint8_t* dst_argb, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width) { int width) {
asm volatile (YUVTORGB_SETUP_AVX2( asm volatile(
YUVTORGB_SETUP_AVX2(
yuvconstants) "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" yuvconstants) "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
LABELALIGN "1: \n" READNV12_AVX2 LABELALIGN "1: \n" READNV12_AVX2
@ -4297,7 +4308,8 @@ void OMITFP NV21ToARGBRow_AVX2(const uint8_t* y_buf,
uint8_t* dst_argb, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width) { int width) {
asm volatile (YUVTORGB_SETUP_AVX2( asm volatile(
YUVTORGB_SETUP_AVX2(
yuvconstants) "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" yuvconstants) "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
LABELALIGN "1: \n" READNV21_AVX2 LABELALIGN "1: \n" READNV21_AVX2
@ -4378,7 +4390,8 @@ void OMITFP P210ToARGBRow_AVX2(const uint16_t* y_buf,
uint8_t* dst_argb, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width) { int width) {
asm volatile (YUVTORGB_SETUP_AVX2( asm volatile(
YUVTORGB_SETUP_AVX2(
yuvconstants) "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" yuvconstants) "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
LABELALIGN "1: \n" READP210_AVX2 LABELALIGN "1: \n" READP210_AVX2
@ -4404,7 +4417,8 @@ void OMITFP P410ToARGBRow_AVX2(const uint16_t* y_buf,
uint8_t* dst_argb, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width) { int width) {
asm volatile (YUVTORGB_SETUP_AVX2( asm volatile(
YUVTORGB_SETUP_AVX2(
yuvconstants) "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" yuvconstants) "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
LABELALIGN "1: \n" READP410_AVX2 LABELALIGN "1: \n" READP410_AVX2
@ -4590,8 +4604,7 @@ static const uvec8 kShuffleMirror = {15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u,
void MirrorRow_SSSE3(const uint8_t* src, uint8_t* dst, int width) { void MirrorRow_SSSE3(const uint8_t* src, uint8_t* dst, int width) {
intptr_t temp_width = (intptr_t)(width); intptr_t temp_width = (intptr_t)(width);
asm volatile ( asm volatile("movdqa %3,%%xmm5 \n"
"movdqa %3,%%xmm5 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
@ -4612,8 +4625,7 @@ void MirrorRow_SSSE3(const uint8_t* src, uint8_t* dst, int width) {
#ifdef HAS_MIRRORROW_AVX2 #ifdef HAS_MIRRORROW_AVX2
void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width) { void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width) {
intptr_t temp_width = (intptr_t)(width); intptr_t temp_width = (intptr_t)(width);
asm volatile ( asm volatile("vbroadcastf128 %3,%%ymm5 \n"
"vbroadcastf128 %3,%%ymm5 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
@ -4640,8 +4652,7 @@ static const uvec8 kShuffleMirrorUV = {14u, 15u, 12u, 13u, 10u, 11u, 8u, 9u,
void MirrorUVRow_SSSE3(const uint8_t* src_uv, uint8_t* dst_uv, int width) { void MirrorUVRow_SSSE3(const uint8_t* src_uv, uint8_t* dst_uv, int width) {
intptr_t temp_width = (intptr_t)(width); intptr_t temp_width = (intptr_t)(width);
asm volatile ( asm volatile("movdqa %3,%%xmm5 \n"
"movdqa %3,%%xmm5 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
@ -4662,8 +4673,7 @@ void MirrorUVRow_SSSE3(const uint8_t* src_uv, uint8_t* dst_uv, int width) {
#ifdef HAS_MIRRORUVROW_AVX2 #ifdef HAS_MIRRORUVROW_AVX2
void MirrorUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_uv, int width) { void MirrorUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_uv, int width) {
intptr_t temp_width = (intptr_t)(width); intptr_t temp_width = (intptr_t)(width);
asm volatile ( asm volatile("vbroadcastf128 %3,%%ymm5 \n"
"vbroadcastf128 %3,%%ymm5 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
@ -4767,8 +4777,7 @@ void RGB24MirrorRow_SSSE3(const uint8_t* src_rgb24,
void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width) { void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width) {
intptr_t temp_width = (intptr_t)(width); intptr_t temp_width = (intptr_t)(width);
asm volatile ( asm volatile("lea -0x10(%0,%2,4),%0 \n"
"lea -0x10(%0,%2,4),%0 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
@ -4792,8 +4801,7 @@ void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width) {
static const ulvec32 kARGBShuffleMirror_AVX2 = {7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u}; static const ulvec32 kARGBShuffleMirror_AVX2 = {7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u};
void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width) { void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width) {
intptr_t temp_width = (intptr_t)(width); intptr_t temp_width = (intptr_t)(width);
asm volatile ( asm volatile("vmovdqu %3,%%ymm5 \n"
"vmovdqu %3,%%ymm5 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
@ -5026,8 +5034,7 @@ void MergeUVRow_AVX512BW(const uint8_t* src_u,
const uint8_t* src_v, const uint8_t* src_v,
uint8_t* dst_uv, uint8_t* dst_uv,
int width) { int width) {
asm volatile ( asm volatile("sub %0,%1 \n"
"sub %0,%1 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
@ -5055,8 +5062,7 @@ void MergeUVRow_AVX2(const uint8_t* src_u,
const uint8_t* src_v, const uint8_t* src_v,
uint8_t* dst_uv, uint8_t* dst_uv,
int width) { int width) {
asm volatile ( asm volatile("sub %0,%1 \n"
"sub %0,%1 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
@ -5084,8 +5090,7 @@ void MergeUVRow_SSE2(const uint8_t* src_u,
const uint8_t* src_v, const uint8_t* src_v,
uint8_t* dst_uv, uint8_t* dst_uv,
int width) { int width) {
asm volatile ( asm volatile("sub %0,%1 \n"
"sub %0,%1 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
@ -5334,8 +5339,7 @@ void Convert16To8Row_AVX512BW(const uint16_t* src_y,
uint8_t* dst_y, uint8_t* dst_y,
int scale, int scale,
int width) { int width) {
asm volatile ( asm volatile("vpbroadcastw %3,%%zmm2 \n"
"vpbroadcastw %3,%%zmm2 \n"
// 64 pixels per loop. // 64 pixels per loop.
LABELALIGN LABELALIGN
@ -5514,8 +5518,11 @@ static const uvec8 kSplitRGBShuffleSSE41[5] = {
{0u, 0u, 128u, 0u, 0u, 128u, 0u, 0u, 128u, 0u, 0u, 128u, 0u, 0u, 128u, 0u}, {0u, 0u, 128u, 0u, 0u, 128u, 0u, 0u, 128u, 0u, 0u, 128u, 0u, 0u, 128u, 0u},
}; };
void SplitRGBRow_SSE41(const uint8_t* src_rgb, uint8_t* dst_r, void SplitRGBRow_SSE41(const uint8_t* src_rgb,
uint8_t* dst_g, uint8_t* dst_b, int width) { uint8_t* dst_r,
uint8_t* dst_g,
uint8_t* dst_b,
int width) {
asm volatile( asm volatile(
"movdqa 48(%5), %%xmm0 \n" "movdqa 48(%5), %%xmm0 \n"
"1: \n" "1: \n"
@ -5554,8 +5561,11 @@ void SplitRGBRow_SSE41(const uint8_t* src_rgb, uint8_t* dst_r,
#endif // HAS_SPLITRGBROW_SSE41 #endif // HAS_SPLITRGBROW_SSE41
#ifdef HAS_SPLITRGBROW_AVX2 #ifdef HAS_SPLITRGBROW_AVX2
void SplitRGBRow_AVX2(const uint8_t* src_rgb, uint8_t* dst_r, void SplitRGBRow_AVX2(const uint8_t* src_rgb,
uint8_t* dst_g, uint8_t* dst_b, int width) { uint8_t* dst_r,
uint8_t* dst_g,
uint8_t* dst_b,
int width) {
asm volatile( asm volatile(
"vbroadcasti128 48(%5), %%ymm0 \n" "vbroadcasti128 48(%5), %%ymm0 \n"
"vbroadcasti128 64(%5), %%ymm7 \n" "vbroadcasti128 64(%5), %%ymm7 \n"
@ -5607,7 +5617,8 @@ void SplitRGBRow_AVX2(const uint8_t* src_rgb, uint8_t* dst_r,
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
"xmm7" "xmm7"
#if defined(__x86_64__) #if defined(__x86_64__)
, "xmm8", "xmm9", "xmm10" ,
"xmm8", "xmm9", "xmm10"
#endif #endif
); );
} }
@ -6530,8 +6541,7 @@ void CopyRow_AVX512BW(const uint8_t* src, uint8_t* dst, int width) {
// Multiple of 1. // Multiple of 1.
void CopyRow_ERMS(const uint8_t* src, uint8_t* dst, int width) { void CopyRow_ERMS(const uint8_t* src, uint8_t* dst, int width) {
size_t width_tmp = (size_t)(width); size_t width_tmp = (size_t)(width);
asm volatile ( asm volatile("rep movsb \n"
"rep movsb \n"
: "+S"(src), // %0 : "+S"(src), // %0
"+D"(dst), // %1 "+D"(dst), // %1
"+c"(width_tmp) // %2 "+c"(width_tmp) // %2
@ -6741,8 +6751,7 @@ void ARGBCopyYToAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width) {
void SetRow_X86(uint8_t* dst, uint8_t v8, int width) { void SetRow_X86(uint8_t* dst, uint8_t v8, int width) {
size_t width_tmp = (size_t)(width >> 2); size_t width_tmp = (size_t)(width >> 2);
const uint32_t v32 = v8 * 0x01010101u; // Duplicate byte to all bytes. const uint32_t v32 = v8 * 0x01010101u; // Duplicate byte to all bytes.
asm volatile ( asm volatile("rep stosl \n"
"rep stosl \n"
: "+D"(dst), // %0 : "+D"(dst), // %0
"+c"(width_tmp) // %1 "+c"(width_tmp) // %1
: "a"(v32) // %2 : "a"(v32) // %2
@ -6751,8 +6760,7 @@ void SetRow_X86(uint8_t* dst, uint8_t v8, int width) {
void SetRow_ERMS(uint8_t* dst, uint8_t v8, int width) { void SetRow_ERMS(uint8_t* dst, uint8_t v8, int width) {
size_t width_tmp = (size_t)(width); size_t width_tmp = (size_t)(width);
asm volatile ( asm volatile("rep stosb \n"
"rep stosb \n"
: "+D"(dst), // %0 : "+D"(dst), // %0
"+c"(width_tmp) // %1 "+c"(width_tmp) // %1
: "a"(v8) // %2 : "a"(v8) // %2
@ -6761,8 +6769,7 @@ void SetRow_ERMS(uint8_t* dst, uint8_t v8, int width) {
void ARGBSetRow_X86(uint8_t* dst_argb, uint32_t v32, int width) { void ARGBSetRow_X86(uint8_t* dst_argb, uint32_t v32, int width) {
size_t width_tmp = (size_t)(width); size_t width_tmp = (size_t)(width);
asm volatile ( asm volatile("rep stosl \n"
"rep stosl \n"
: "+D"(dst_argb), // %0 : "+D"(dst_argb), // %0
"+c"(width_tmp) // %1 "+c"(width_tmp) // %1
: "a"(v32) // %2 : "a"(v32) // %2
@ -7923,8 +7930,7 @@ void ARGBMultiplyRow_SSE2(const uint8_t* src_argb,
const uint8_t* src_argb1, const uint8_t* src_argb1,
uint8_t* dst_argb, uint8_t* dst_argb,
int width) { int width) {
asm volatile ( asm volatile("pxor %%xmm5,%%xmm5 \n"
"pxor %%xmm5,%%xmm5 \n"
// 4 pixel loop. // 4 pixel loop.
LABELALIGN LABELALIGN
@ -7961,8 +7967,7 @@ void ARGBMultiplyRow_AVX2(const uint8_t* src_argb,
const uint8_t* src_argb1, const uint8_t* src_argb1,
uint8_t* dst_argb, uint8_t* dst_argb,
int width) { int width) {
asm volatile ( asm volatile("vpxor %%ymm5,%%ymm5,%%ymm5 \n"
"vpxor %%ymm5,%%ymm5,%%ymm5 \n"
// 4 pixel loop. // 4 pixel loop.
LABELALIGN LABELALIGN
@ -8809,8 +8814,7 @@ void ARGBShuffleRow_SSSE3(const uint8_t* src_argb,
uint8_t* dst_argb, uint8_t* dst_argb,
const uint8_t* shuffler, const uint8_t* shuffler,
int width) { int width) {
asm volatile ( asm volatile("movdqu (%3),%%xmm5 \n"
"movdqu (%3),%%xmm5 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
@ -8838,8 +8842,7 @@ void ARGBShuffleRow_AVX2(const uint8_t* src_argb,
uint8_t* dst_argb, uint8_t* dst_argb,
const uint8_t* shuffler, const uint8_t* shuffler,
int width) { int width) {
asm volatile ( asm volatile("vbroadcastf128 (%3),%%ymm5 \n"
"vbroadcastf128 (%3),%%ymm5 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
@ -8868,8 +8871,7 @@ void I422ToYUY2Row_SSE2(const uint8_t* src_y,
const uint8_t* src_v, const uint8_t* src_v,
uint8_t* dst_yuy2, uint8_t* dst_yuy2,
int width) { int width) {
asm volatile ( asm volatile("sub %1,%2 \n"
"sub %1,%2 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
@ -8903,8 +8905,7 @@ void I422ToUYVYRow_SSE2(const uint8_t* src_y,
const uint8_t* src_v, const uint8_t* src_v,
uint8_t* dst_uyvy, uint8_t* dst_uyvy,
int width) { int width) {
asm volatile ( asm volatile("sub %1,%2 \n"
"sub %1,%2 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
@ -8938,8 +8939,7 @@ void I422ToYUY2Row_AVX2(const uint8_t* src_y,
const uint8_t* src_v, const uint8_t* src_v,
uint8_t* dst_yuy2, uint8_t* dst_yuy2,
int width) { int width) {
asm volatile ( asm volatile("sub %1,%2 \n"
"sub %1,%2 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
@ -8976,8 +8976,7 @@ void I422ToUYVYRow_AVX2(const uint8_t* src_y,
const uint8_t* src_v, const uint8_t* src_v,
uint8_t* dst_uyvy, uint8_t* dst_uyvy,
int width) { int width) {
asm volatile ( asm volatile("sub %1,%2 \n"
"sub %1,%2 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
@ -9013,8 +9012,7 @@ void ARGBPolynomialRow_SSE2(const uint8_t* src_argb,
uint8_t* dst_argb, uint8_t* dst_argb,
const float* poly, const float* poly,
int width) { int width) {
asm volatile ( asm volatile("pxor %%xmm3,%%xmm3 \n"
"pxor %%xmm3,%%xmm3 \n"
// 2 pixel loop. // 2 pixel loop.
LABELALIGN LABELALIGN
@ -9059,7 +9057,8 @@ void ARGBPolynomialRow_SSE2(const uint8_t* src_argb,
"+r"(dst_argb), // %1 "+r"(dst_argb), // %1
"+r"(width) // %2 "+r"(width) // %2
: "r"(poly) // %3 : "r"(poly) // %3
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5",
"xmm6");
} }
#endif // HAS_ARGBPOLYNOMIALROW_SSE2 #endif // HAS_ARGBPOLYNOMIALROW_SSE2
@ -9551,8 +9550,7 @@ static const uvec8 kShuffleUVToVU = {1u, 0u, 3u, 2u, 5u, 4u, 7u, 6u,
// Convert UV plane of NV12 to VU of NV21. // Convert UV plane of NV12 to VU of NV21.
void SwapUVRow_SSSE3(const uint8_t* src_uv, uint8_t* dst_vu, int width) { void SwapUVRow_SSSE3(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
asm volatile ( asm volatile("movdqu %3,%%xmm5 \n"
"movdqu %3,%%xmm5 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"
@ -9576,8 +9574,7 @@ void SwapUVRow_SSSE3(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
#ifdef HAS_SWAPUVROW_AVX2 #ifdef HAS_SWAPUVROW_AVX2
void SwapUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_vu, int width) { void SwapUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
asm volatile ( asm volatile("vbroadcastf128 %3,%%ymm5 \n"
"vbroadcastf128 %3,%%ymm5 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"

View File

@ -290,7 +290,8 @@ void I210ToAR30Row_NEON(const uint16_t* src_y,
const vec16* rgb_coeff = &yuvconstants->kRGBCoeffBias; const vec16* rgb_coeff = &yuvconstants->kRGBCoeffBias;
uint16_t limit = 0x3ff0; uint16_t limit = 0x3ff0;
uint16_t alpha = 0xc000; uint16_t alpha = 0xc000;
asm volatile (YUVTORGB_SETUP asm volatile(
YUVTORGB_SETUP
"dup v22.8h, %w[limit] \n" "dup v22.8h, %w[limit] \n"
"dup v23.8h, %w[alpha] \n" "dup v23.8h, %w[alpha] \n"
"1: \n" READYUV210 NVTORGB "1: \n" READYUV210 NVTORGB
@ -318,7 +319,8 @@ void I410ToAR30Row_NEON(const uint16_t* src_y,
const vec16* rgb_coeff = &yuvconstants->kRGBCoeffBias; const vec16* rgb_coeff = &yuvconstants->kRGBCoeffBias;
uint16_t limit = 0x3ff0; uint16_t limit = 0x3ff0;
uint16_t alpha = 0xc000; uint16_t alpha = 0xc000;
asm volatile (YUVTORGB_SETUP asm volatile(
YUVTORGB_SETUP
"dup v22.8h, %w[limit] \n" "dup v22.8h, %w[limit] \n"
"dup v23.8h, %w[alpha] \n" "dup v23.8h, %w[alpha] \n"
"1: \n" READYUV410 NVTORGB "1: \n" READYUV410 NVTORGB
@ -369,7 +371,8 @@ void I210ToARGBRow_NEON(const uint16_t* src_y,
uint8_t* dst_argb, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width) { int width) {
asm volatile (YUVTORGB_SETUP asm volatile(
YUVTORGB_SETUP
"movi v19.8b, #255 \n" "movi v19.8b, #255 \n"
"1: \n" READYUV210 NVTORGB RGBTORGB8 "1: \n" READYUV210 NVTORGB RGBTORGB8
"subs %w[width], %w[width], #8 \n" "subs %w[width], %w[width], #8 \n"
@ -391,7 +394,8 @@ void I410ToARGBRow_NEON(const uint16_t* src_y,
uint8_t* dst_argb, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width) { int width) {
asm volatile (YUVTORGB_SETUP asm volatile(
YUVTORGB_SETUP
"movi v19.8b, #255 \n" "movi v19.8b, #255 \n"
"1: \n" READYUV410 NVTORGB RGBTORGB8 "1: \n" READYUV410 NVTORGB RGBTORGB8
"subs %w[width], %w[width], #8 \n" "subs %w[width], %w[width], #8 \n"
@ -626,7 +630,8 @@ void I410AlphaToARGBRow_NEON(const uint16_t* src_y,
uint8_t* dst_argb, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width) { int width) {
asm volatile (YUVTORGB_SETUP asm volatile(
YUVTORGB_SETUP
"1: \n" "1: \n"
"ld1 {v19.16b}, [%[src_a]], #16 \n" READYUV410 "ld1 {v19.16b}, [%[src_a]], #16 \n" READYUV410
"uqshrn v19.8b, v19.8h, #2 \n" NVTORGB RGBTORGB8 "uqshrn v19.8b, v19.8h, #2 \n" NVTORGB RGBTORGB8
@ -651,7 +656,8 @@ void I210AlphaToARGBRow_NEON(const uint16_t* src_y,
uint8_t* dst_argb, uint8_t* dst_argb,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width) { int width) {
asm volatile (YUVTORGB_SETUP asm volatile(
YUVTORGB_SETUP
"1: \n" "1: \n"
"ld1 {v19.16b}, [%[src_a]], #16 \n" READYUV210 "ld1 {v19.16b}, [%[src_a]], #16 \n" READYUV210
"uqshrn v19.8b, v19.8h, #2 \n" NVTORGB RGBTORGB8 "uqshrn v19.8b, v19.8h, #2 \n" NVTORGB RGBTORGB8
@ -807,7 +813,8 @@ void I422ToARGB1555Row_NEON(const uint8_t* src_y,
uint8_t* dst_argb1555, uint8_t* dst_argb1555,
const struct YuvConstants* yuvconstants, const struct YuvConstants* yuvconstants,
int width) { int width) {
asm volatile (YUVTORGB_SETUP asm volatile(
YUVTORGB_SETUP
"movi v19.8h, #0x80, lsl #8 \n" "movi v19.8h, #0x80, lsl #8 \n"
"1: \n" // "1: \n" //
READYUV422 I4XXTORGB RGBTORGB8_TOP READYUV422 I4XXTORGB RGBTORGB8_TOP

View File

@ -10,8 +10,8 @@
#include "libyuv/scale.h" #include "libyuv/scale.h"
#include <limits.h>
#include <assert.h> #include <assert.h>
#include <limits.h>
#include <stdint.h> #include <stdint.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
@ -1233,10 +1233,9 @@ int YUVToARGBScaleClip(const uint8_t* src_y,
(void)src_fourcc; // TODO(fbarchard): implement and/or assert. (void)src_fourcc; // TODO(fbarchard): implement and/or assert.
(void)dst_fourcc; (void)dst_fourcc;
const int abs_src_height = (src_height < 0) ? -src_height : src_height; const int abs_src_height = (src_height < 0) ? -src_height : src_height;
if (!src_y || !src_u || !src_v || !dst_argb || if (!src_y || !src_u || !src_v || !dst_argb || src_width <= 0 ||
src_width <= 0 || src_width > INT_MAX / 4 || src_height == 0 || src_width > INT_MAX / 4 || src_height == 0 || dst_width <= 0 ||
dst_width <= 0 || dst_height <= 0 || dst_height <= 0 || clip_width <= 0 || clip_height <= 0) {
clip_width <= 0 || clip_height <= 0) {
return -1; return -1;
} }
const uint64_t argb_buffer_size = (uint64_t)src_width * abs_src_height * 4; const uint64_t argb_buffer_size = (uint64_t)src_width * abs_src_height * 4;
@ -1250,9 +1249,9 @@ int YUVToARGBScaleClip(const uint8_t* src_y,
I420ToARGB(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, I420ToARGB(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
argb_buffer, src_width * 4, src_width, src_height); argb_buffer, src_width * 4, src_width, src_height);
r = ARGBScaleClip(argb_buffer, src_width * 4, src_width, abs_src_height, dst_argb, r = ARGBScaleClip(argb_buffer, src_width * 4, src_width, abs_src_height,
dst_stride_argb, dst_width, dst_height, clip_x, clip_y, dst_argb, dst_stride_argb, dst_width, dst_height, clip_x,
clip_width, clip_height, filtering); clip_y, clip_width, clip_height, filtering);
free(argb_buffer); free(argb_buffer);
return r; return r;
} }

View File

@ -1761,8 +1761,7 @@ void ScaleRowUp2_Bilinear_16_AVX2(const uint16_t* src_ptr,
void ScaleAddRow_SSE2(const uint8_t* src_ptr, void ScaleAddRow_SSE2(const uint8_t* src_ptr,
uint16_t* dst_ptr, uint16_t* dst_ptr,
int src_width) { int src_width) {
asm volatile ( asm volatile("pxor %%xmm5,%%xmm5 \n"
"pxor %%xmm5,%%xmm5 \n"
// 16 pixel loop. // 16 pixel loop.
LABELALIGN LABELALIGN
@ -1793,8 +1792,7 @@ void ScaleAddRow_SSE2(const uint8_t* src_ptr,
void ScaleAddRow_AVX2(const uint8_t* src_ptr, void ScaleAddRow_AVX2(const uint8_t* src_ptr,
uint16_t* dst_ptr, uint16_t* dst_ptr,
int src_width) { int src_width) {
asm volatile ( asm volatile("vpxor %%ymm5,%%ymm5,%%ymm5 \n"
"vpxor %%ymm5,%%ymm5,%%ymm5 \n"
LABELALIGN LABELALIGN
"1: \n" "1: \n"

View File

@ -1178,12 +1178,8 @@ void ScaleARGBRowDownEven_NEON(const uint8_t* src_argb,
"stp w10, w11, [%[dst]], #8 \n" "stp w10, w11, [%[dst]], #8 \n"
"stp w12, w13, [%[dst]], #8 \n" "stp w12, w13, [%[dst]], #8 \n"
"b.gt 1b \n" "b.gt 1b \n"
: [src]"+r"(src_argb), : [src] "+r"(src_argb), [src1] "+r"(src_argb1), [src2] "+r"(src_argb2),
[src1]"+r"(src_argb1), [src3] "+r"(src_argb3), [dst] "+r"(dst_argb), [width] "+r"(dst_width),
[src2]"+r"(src_argb2),
[src3]"+r"(src_argb3),
[dst]"+r"(dst_argb),
[width]"+r"(dst_width),
[i] "+r"(i) [i] "+r"(i)
: [step] "r"((int64_t)(src_stepx * 16)) : [step] "r"((int64_t)(src_stepx * 16))
: "memory", "cc", "w10", "w11", "w12", "w13"); : "memory", "cc", "w10", "w11", "w12", "w13");

View File

@ -10,8 +10,8 @@
#include "libyuv/scale.h" /* For FilterMode */ #include "libyuv/scale.h" /* For FilterMode */
#include <limits.h>
#include <assert.h> #include <assert.h>
#include <limits.h>
#include <stdint.h> #include <stdint.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
@ -41,9 +41,9 @@ int RGBScale(const uint8_t* src_rgb,
int dst_height, int dst_height,
enum FilterMode filtering) { enum FilterMode filtering) {
int r; int r;
if (!src_rgb || !dst_rgb || if (!src_rgb || !dst_rgb || src_width <= 0 || src_width > INT_MAX / 4 ||
src_width <= 0 || src_width > INT_MAX / 4 || src_height == 0 || src_height == 0 || dst_width <= 0 || dst_width > INT_MAX / 4 ||
dst_width <= 0 || dst_width > INT_MAX / 4 || dst_height <= 0) { dst_height <= 0) {
return -1; return -1;
} }
const int abs_src_height = (src_height < 0) ? -src_height : src_height; const int abs_src_height = (src_height < 0) ? -src_height : src_height;

View File

@ -15,7 +15,6 @@ namespace libyuv {
extern "C" { extern "C" {
#endif #endif
#if !defined(LIBYUV_DISABLE_SME) && defined(CLANG_HAS_SME) && \ #if !defined(LIBYUV_DISABLE_SME) && defined(CLANG_HAS_SME) && \
defined(__aarch64__) defined(__aarch64__)

View File

@ -12,6 +12,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <time.h> #include <time.h>
#include "../unit_test/unit_test.h"
#include "libyuv/basic_types.h" #include "libyuv/basic_types.h"
#include "libyuv/compare.h" #include "libyuv/compare.h"
#include "libyuv/convert.h" #include "libyuv/convert.h"
@ -19,7 +20,6 @@
#include "libyuv/convert_from.h" #include "libyuv/convert_from.h"
#include "libyuv/convert_from_argb.h" #include "libyuv/convert_from_argb.h"
#include "libyuv/cpu_id.h" #include "libyuv/cpu_id.h"
#include "../unit_test/unit_test.h"
#include "libyuv/planar_functions.h" #include "libyuv/planar_functions.h"
#include "libyuv/rotate.h" #include "libyuv/rotate.h"
#include "libyuv/video_common.h" #include "libyuv/video_common.h"

View File

@ -169,8 +169,8 @@ TEST_F(LibYUVBaseTest, TestCpuHas) {
} }
#endif // defined(__loongarch__) #endif // defined(__loongarch__)
#if defined(__i386__) || defined(__x86_64__) || \ #if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || \
defined(_M_IX86) || defined(_M_X64) defined(_M_X64)
int has_x86 = TestCpuFlag(kCpuHasX86); int has_x86 = TestCpuFlag(kCpuHasX86);
if (has_x86) { if (has_x86) {
int has_sse2 = TestCpuFlag(kCpuHasSSE2); int has_sse2 = TestCpuFlag(kCpuHasSSE2);
@ -215,7 +215,8 @@ TEST_F(LibYUVBaseTest, TestCpuHas) {
printf("Has AVXVNNIINT8 0x%x\n", has_avxvnniint8); printf("Has AVXVNNIINT8 0x%x\n", has_avxvnniint8);
printf("Has AMXINT8 0x%x\n", has_amxint8); printf("Has AMXINT8 0x%x\n", has_amxint8);
} }
#endif // defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64) #endif // defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) ||
// defined(_M_X64)
} }
TEST_F(LibYUVBaseTest, TestCompilerMacros) { TEST_F(LibYUVBaseTest, TestCompilerMacros) {

View File

@ -1570,18 +1570,21 @@ static int TestCopyPlane(int benchmark_width,
// Disable all optimizations. // Disable all optimizations.
MaskCpuFlags(disable_cpu_flags); MaskCpuFlags(disable_cpu_flags);
for (int i = 0; i < benchmark_iterations; i++) { for (int i = 0; i < benchmark_iterations; i++) {
CopyPlane(orig_y + off, benchmark_width, dst_c, benchmark_width, benchmark_width, benchmark_height * invert); CopyPlane(orig_y + off, benchmark_width, dst_c, benchmark_width,
benchmark_width, benchmark_height * invert);
} }
// Enable optimizations. // Enable optimizations.
MaskCpuFlags(benchmark_cpu_info); MaskCpuFlags(benchmark_cpu_info);
for (int i = 0; i < benchmark_iterations; i++) { for (int i = 0; i < benchmark_iterations; i++) {
CopyPlane(orig_y + off, benchmark_width, dst_opt, benchmark_width, benchmark_width, benchmark_height * invert); CopyPlane(orig_y + off, benchmark_width, dst_opt, benchmark_width,
benchmark_width, benchmark_height * invert);
} }
int max_diff = 0; int max_diff = 0;
for (int i = 0; i < y_plane_size; ++i) { for (int i = 0; i < y_plane_size; ++i) {
int abs_diff = abs(static_cast<int>(dst_c[i]) - static_cast<int>(dst_opt[i])); int abs_diff =
abs(static_cast<int>(dst_c[i]) - static_cast<int>(dst_opt[i]));
if (abs_diff > max_diff) { if (abs_diff > max_diff) {
max_diff = abs_diff; max_diff = abs_diff;
} }
@ -2499,17 +2502,19 @@ static int TestHalfFloatPlane(int benchmark_width,
// Disable all optimizations. // Disable all optimizations.
MaskCpuFlags(disable_cpu_flags); MaskCpuFlags(disable_cpu_flags);
for (j = 0; j < benchmark_iterations; j++) { for (j = 0; j < benchmark_iterations; j++) {
HalfFloatPlane(reinterpret_cast<uint16_t*>(orig_y + off), benchmark_width * 2, HalfFloatPlane(reinterpret_cast<uint16_t*>(orig_y + off),
reinterpret_cast<uint16_t*>(dst_c), benchmark_width * 2, benchmark_width * 2, reinterpret_cast<uint16_t*>(dst_c),
scale, benchmark_width, benchmark_height * invert); benchmark_width * 2, scale, benchmark_width,
benchmark_height * invert);
} }
// Enable optimizations. // Enable optimizations.
MaskCpuFlags(benchmark_cpu_info); MaskCpuFlags(benchmark_cpu_info);
for (j = 0; j < benchmark_iterations; j++) { for (j = 0; j < benchmark_iterations; j++) {
HalfFloatPlane(reinterpret_cast<uint16_t*>(orig_y + off), benchmark_width * 2, HalfFloatPlane(reinterpret_cast<uint16_t*>(orig_y + off),
reinterpret_cast<uint16_t*>(dst_opt), benchmark_width * 2, benchmark_width * 2, reinterpret_cast<uint16_t*>(dst_opt),
scale, benchmark_width, benchmark_height * invert); benchmark_width * 2, scale, benchmark_width,
benchmark_height * invert);
} }
int max_diff = 0; int max_diff = 0;
@ -2536,23 +2541,23 @@ TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_One) {
} }
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_Opt) { TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_Opt) {
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, int diff = TestHalfFloatPlane(
benchmark_iterations_, disable_cpu_flags_, benchmark_width_, benchmark_height_, benchmark_iterations_,
benchmark_cpu_info_, 1.0f / 65535.0f, 65535, +1, 0); disable_cpu_flags_, benchmark_cpu_info_, 1.0f / 65535.0f, 65535, +1, 0);
EXPECT_EQ(0, diff); EXPECT_EQ(0, diff);
} }
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_12bit_Opt) { TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_12bit_Opt) {
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, int diff = TestHalfFloatPlane(
benchmark_iterations_, disable_cpu_flags_, benchmark_width_, benchmark_height_, benchmark_iterations_,
benchmark_cpu_info_, 1.0f / 4095.0f, 4095, +1, 0); disable_cpu_flags_, benchmark_cpu_info_, 1.0f / 4095.0f, 4095, +1, 0);
EXPECT_EQ(0, diff); EXPECT_EQ(0, diff);
} }
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_10bit_Opt) { TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_10bit_Opt) {
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, int diff = TestHalfFloatPlane(
benchmark_iterations_, disable_cpu_flags_, benchmark_width_, benchmark_height_, benchmark_iterations_,
benchmark_cpu_info_, 1.0f / 1023.0f, 1023, +1, 0); disable_cpu_flags_, benchmark_cpu_info_, 1.0f / 1023.0f, 1023, +1, 0);
EXPECT_EQ(0, diff); EXPECT_EQ(0, diff);
} }
@ -2564,37 +2569,37 @@ TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_9bit_Opt) {
} }
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Any) { TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Any) {
int diff = TestHalfFloatPlane(benchmark_width_ + 1, benchmark_height_, int diff = TestHalfFloatPlane(
benchmark_iterations_, disable_cpu_flags_, benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
benchmark_cpu_info_, 1.0f / 4096.0f, 4095, +1, 0); disable_cpu_flags_, benchmark_cpu_info_, 1.0f / 4096.0f, 4095, +1, 0);
EXPECT_EQ(0, diff); EXPECT_EQ(0, diff);
} }
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Unaligned) { TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Unaligned) {
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, int diff = TestHalfFloatPlane(
benchmark_iterations_, disable_cpu_flags_, benchmark_width_, benchmark_height_, benchmark_iterations_,
benchmark_cpu_info_, 1.0f / 4096.0f, 4095, +1, 2); disable_cpu_flags_, benchmark_cpu_info_, 1.0f / 4096.0f, 4095, +1, 2);
EXPECT_EQ(0, diff); EXPECT_EQ(0, diff);
} }
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Invert) { TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Invert) {
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, int diff = TestHalfFloatPlane(
benchmark_iterations_, disable_cpu_flags_, benchmark_width_, benchmark_height_, benchmark_iterations_,
benchmark_cpu_info_, 1.0f / 4096.0f, 4095, -1, 0); disable_cpu_flags_, benchmark_cpu_info_, 1.0f / 4096.0f, 4095, -1, 0);
EXPECT_EQ(0, diff); EXPECT_EQ(0, diff);
} }
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Opt) { TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Opt) {
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, int diff = TestHalfFloatPlane(
benchmark_iterations_, disable_cpu_flags_, benchmark_width_, benchmark_height_, benchmark_iterations_,
benchmark_cpu_info_, 1.0f / 4096.0f, 4095, +1, 0); disable_cpu_flags_, benchmark_cpu_info_, 1.0f / 4096.0f, 4095, +1, 0);
EXPECT_EQ(0, diff); EXPECT_EQ(0, diff);
} }
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_denormal) { TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_denormal) {
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, int diff = TestHalfFloatPlane(
benchmark_iterations_, disable_cpu_flags_, benchmark_width_, benchmark_height_, benchmark_iterations_,
benchmark_cpu_info_, 1.0f / 65535.0f, 65535, +1, 0); disable_cpu_flags_, benchmark_cpu_info_, 1.0f / 65535.0f, 65535, +1, 0);
EXPECT_EQ(0, diff); EXPECT_EQ(0, diff);
} }
@ -2605,8 +2610,7 @@ static void EnableFlushDenormalToZero(void) {
"vmrs %0, fpscr \n" "vmrs %0, fpscr \n"
"orr %0, %0, #0x1000000 \n" "orr %0, %0, #0x1000000 \n"
"vmsr fpscr, %0 \n" "vmsr fpscr, %0 \n"
: "=r"(cw) : "=r"(cw)::"memory", "cc"); // Clobber List
::"memory", "cc"); // Clobber List
} }
static void DisableFlushDenormalToZero(void) { static void DisableFlushDenormalToZero(void) {
@ -2615,8 +2619,7 @@ static void DisableFlushDenormalToZero(void) {
"vmrs %0, fpscr \n" "vmrs %0, fpscr \n"
"bic %0, %0, #0x1000000 \n" "bic %0, %0, #0x1000000 \n"
"vmsr fpscr, %0 \n" "vmsr fpscr, %0 \n"
: "=r"(cw) : "=r"(cw)::"memory", "cc"); // Clobber List
::"memory", "cc"); // Clobber List
} }
// 5 bit exponent with bias of 15 will underflow to a denormal if scale causes // 5 bit exponent with bias of 15 will underflow to a denormal if scale causes
@ -2626,18 +2629,18 @@ static void DisableFlushDenormalToZero(void) {
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_flush_denormal) { TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_flush_denormal) {
// 32 bit arm rounding on denormal case is off by 1 compared to C. // 32 bit arm rounding on denormal case is off by 1 compared to C.
EnableFlushDenormalToZero(); EnableFlushDenormalToZero();
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, int diff = TestHalfFloatPlane(
benchmark_iterations_, disable_cpu_flags_, benchmark_width_, benchmark_height_, benchmark_iterations_,
benchmark_cpu_info_, 1.0f / 65535.0f, 65535, +1, 0); disable_cpu_flags_, benchmark_cpu_info_, 1.0f / 65535.0f, 65535, +1, 0);
DisableFlushDenormalToZero(); DisableFlushDenormalToZero();
EXPECT_EQ(0, diff); EXPECT_EQ(0, diff);
} }
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_10bit_flush_denormal) { TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_10bit_flush_denormal) {
EnableFlushDenormalToZero(); EnableFlushDenormalToZero();
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_, int diff = TestHalfFloatPlane(
benchmark_iterations_, disable_cpu_flags_, benchmark_width_, benchmark_height_, benchmark_iterations_,
benchmark_cpu_info_, 1.0f / 1023.0f, 1023, +1, 0); disable_cpu_flags_, benchmark_cpu_info_, 1.0f / 1023.0f, 1023, +1, 0);
DisableFlushDenormalToZero(); DisableFlushDenormalToZero();
EXPECT_EQ(0, diff); EXPECT_EQ(0, diff);
} }
@ -3184,8 +3187,9 @@ TEST_F(LibYUVPlanarTest, MergeRGBPlane_Opt) {
tmp_pixels_c_b, benchmark_width_, benchmark_width_, tmp_pixels_c_b, benchmark_width_, benchmark_width_,
benchmark_height_); benchmark_height_);
MergeRGBPlane(tmp_pixels_c_r, benchmark_width_, tmp_pixels_c_g, MergeRGBPlane(tmp_pixels_c_r, benchmark_width_, tmp_pixels_c_g,
benchmark_width_, tmp_pixels_c_b, benchmark_width_, dst_pixels_c, benchmark_width_, tmp_pixels_c_b, benchmark_width_,
benchmark_width_ * 3, benchmark_width_, benchmark_height_); dst_pixels_c, benchmark_width_ * 3, benchmark_width_,
benchmark_height_);
MaskCpuFlags(benchmark_cpu_info_); MaskCpuFlags(benchmark_cpu_info_);
SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_opt_r, SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_opt_r,
@ -3244,8 +3248,9 @@ TEST_F(LibYUVPlanarTest, SplitRGBPlane_Opt) {
tmp_pixels_c_b, benchmark_width_, benchmark_width_, tmp_pixels_c_b, benchmark_width_, benchmark_width_,
benchmark_height_); benchmark_height_);
MergeRGBPlane(tmp_pixels_c_r, benchmark_width_, tmp_pixels_c_g, MergeRGBPlane(tmp_pixels_c_r, benchmark_width_, tmp_pixels_c_g,
benchmark_width_, tmp_pixels_c_b, benchmark_width_, dst_pixels_c, benchmark_width_, tmp_pixels_c_b, benchmark_width_,
benchmark_width_ * 3, benchmark_width_, benchmark_height_); dst_pixels_c, benchmark_width_ * 3, benchmark_width_,
benchmark_height_);
MaskCpuFlags(benchmark_cpu_info_); MaskCpuFlags(benchmark_cpu_info_);
for (int i = 0; i < benchmark_iterations_; ++i) { for (int i = 0; i < benchmark_iterations_; ++i) {
@ -3446,8 +3451,8 @@ TEST_F(LibYUVPlanarTest, MergeXRGBPlane_Opt) {
for (int i = 0; i < benchmark_iterations_; ++i) { for (int i = 0; i < benchmark_iterations_; ++i) {
MergeARGBPlane(tmp_pixels_opt_r, benchmark_width_, tmp_pixels_opt_g, MergeARGBPlane(tmp_pixels_opt_r, benchmark_width_, tmp_pixels_opt_g,
benchmark_width_, tmp_pixels_opt_b, benchmark_width_, NULL, 0, benchmark_width_, tmp_pixels_opt_b, benchmark_width_, NULL,
dst_pixels_opt, benchmark_width_ * 4, benchmark_width_, 0, dst_pixels_opt, benchmark_width_ * 4, benchmark_width_,
benchmark_height_); benchmark_height_);
} }
@ -3502,8 +3507,8 @@ TEST_F(LibYUVPlanarTest, SplitXRGBPlane_Opt) {
for (int i = 0; i < benchmark_iterations_; ++i) { for (int i = 0; i < benchmark_iterations_; ++i) {
SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_opt_r, SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_opt_r,
benchmark_width_, tmp_pixels_opt_g, benchmark_width_, benchmark_width_, tmp_pixels_opt_g, benchmark_width_,
tmp_pixels_opt_b, benchmark_width_, NULL, 0, benchmark_width_, tmp_pixels_opt_b, benchmark_width_, NULL, 0,
benchmark_height_); benchmark_width_, benchmark_height_);
} }
MergeARGBPlane(tmp_pixels_opt_r, benchmark_width_, tmp_pixels_opt_g, MergeARGBPlane(tmp_pixels_opt_r, benchmark_width_, tmp_pixels_opt_g,