mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
Apply clang format
Bug: None Change-Id: I0d9db4b384144523e61ae32b6ab3f72e93a0c265 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/6138934 Commit-Queue: Frank Barchard <fbarchard@chromium.org> Reviewed-by: Wan-Teh Chang <wtc@google.com>
This commit is contained in:
parent
b5a18f9d93
commit
e0040eb318
@ -67,7 +67,6 @@ static const int kCpuHasLOONGARCH = 0x20;
|
|||||||
static const int kCpuHasLSX = 0x100;
|
static const int kCpuHasLSX = 0x100;
|
||||||
static const int kCpuHasLASX = 0x200;
|
static const int kCpuHasLASX = 0x200;
|
||||||
|
|
||||||
|
|
||||||
// Optional init function. TestCpuFlag does an auto-init.
|
// Optional init function. TestCpuFlag does an auto-init.
|
||||||
// Returns cpu_info flags.
|
// Returns cpu_info flags.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
|
|||||||
@ -499,8 +499,8 @@ static inline void I422ToRGB565Row_SVE_SC(
|
|||||||
// Calculate a predicate for the final iteration to deal with the tail.
|
// Calculate a predicate for the final iteration to deal with the tail.
|
||||||
"cnth %[vl] \n"
|
"cnth %[vl] \n"
|
||||||
"whilelt p1.b, wzr, %w[width] \n" //
|
"whilelt p1.b, wzr, %w[width] \n" //
|
||||||
READYUV422_SVE_2X I422TORGB_SVE_2X
|
READYUV422_SVE_2X I422TORGB_SVE_2X RGBTOARGB8_SVE_TOP_2X
|
||||||
RGBTOARGB8_SVE_TOP_2X RGB8TORGB565_SVE_FROM_TOP_2X
|
RGB8TORGB565_SVE_FROM_TOP_2X
|
||||||
"st2h {z18.h, z19.h}, p1, [%[dst]] \n"
|
"st2h {z18.h, z19.h}, p1, [%[dst]] \n"
|
||||||
|
|
||||||
"99: \n"
|
"99: \n"
|
||||||
@ -558,8 +558,8 @@ static inline void I422ToARGB1555Row_SVE_SC(
|
|||||||
// Calculate a predicate for the final iteration to deal with the tail.
|
// Calculate a predicate for the final iteration to deal with the tail.
|
||||||
"cnth %[vl] \n"
|
"cnth %[vl] \n"
|
||||||
"whilelt p1.b, wzr, %w[width] \n" //
|
"whilelt p1.b, wzr, %w[width] \n" //
|
||||||
READYUV422_SVE_2X I422TORGB_SVE_2X
|
READYUV422_SVE_2X I422TORGB_SVE_2X RGBTOARGB8_SVE_TOP_2X
|
||||||
RGBTOARGB8_SVE_TOP_2X RGB8TOARGB1555_SVE_FROM_TOP_2X
|
RGB8TOARGB1555_SVE_FROM_TOP_2X
|
||||||
"st2h {z0.h, z1.h}, p1, [%[dst]] \n"
|
"st2h {z0.h, z1.h}, p1, [%[dst]] \n"
|
||||||
|
|
||||||
"99: \n"
|
"99: \n"
|
||||||
@ -617,8 +617,8 @@ static inline void I422ToARGB4444Row_SVE_SC(
|
|||||||
// Calculate a predicate for the final iteration to deal with the tail.
|
// Calculate a predicate for the final iteration to deal with the tail.
|
||||||
"cnth %[vl] \n"
|
"cnth %[vl] \n"
|
||||||
"whilelt p1.b, wzr, %w[width] \n" //
|
"whilelt p1.b, wzr, %w[width] \n" //
|
||||||
READYUV422_SVE_2X I422TORGB_SVE_2X
|
READYUV422_SVE_2X I422TORGB_SVE_2X RGBTOARGB8_SVE_TOP_2X
|
||||||
RGBTOARGB8_SVE_TOP_2X RGB8TOARGB4444_SVE_FROM_TOP_2X
|
RGB8TOARGB4444_SVE_FROM_TOP_2X
|
||||||
"st2h {z0.h, z1.h}, p1, [%[dst]] \n"
|
"st2h {z0.h, z1.h}, p1, [%[dst]] \n"
|
||||||
|
|
||||||
"99: \n"
|
"99: \n"
|
||||||
|
|||||||
@ -70,9 +70,8 @@ int ConvertToARGB(const uint8_t* sample,
|
|||||||
uint8_t* rotate_buffer = NULL;
|
uint8_t* rotate_buffer = NULL;
|
||||||
int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;
|
int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;
|
||||||
|
|
||||||
if (dst_argb == NULL || sample == NULL ||
|
if (dst_argb == NULL || sample == NULL || src_width <= 0 ||
|
||||||
src_width <= 0 || src_width > INT_MAX / 4 ||
|
src_width > INT_MAX / 4 || crop_width <= 0 || crop_width > INT_MAX / 4 ||
|
||||||
crop_width <= 0 || crop_width > INT_MAX / 4 ||
|
|
||||||
src_height == 0 || crop_height == 0) {
|
src_height == 0 || crop_height == 0) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@ -81,7 +80,8 @@ int ConvertToARGB(const uint8_t* sample,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (need_buf) {
|
if (need_buf) {
|
||||||
const uint64_t rotate_buffer_size = (uint64_t)crop_width * 4 * abs_crop_height;
|
const uint64_t rotate_buffer_size =
|
||||||
|
(uint64_t)crop_width * 4 * abs_crop_height;
|
||||||
if (rotate_buffer_size > SIZE_MAX) {
|
if (rotate_buffer_size > SIZE_MAX) {
|
||||||
return -1; // Invalid size.
|
return -1; // Invalid size.
|
||||||
}
|
}
|
||||||
|
|||||||
@ -65,8 +65,9 @@ int ConvertToI420(const uint8_t* sample,
|
|||||||
const int inv_crop_height =
|
const int inv_crop_height =
|
||||||
(src_height < 0) ? -abs_crop_height : abs_crop_height;
|
(src_height < 0) ? -abs_crop_height : abs_crop_height;
|
||||||
|
|
||||||
if (!dst_y || !dst_u || !dst_v || !sample || src_width <= 0 || src_width > INT_MAX / 4 ||
|
if (!dst_y || !dst_u || !dst_v || !sample || src_width <= 0 ||
|
||||||
crop_width <= 0 || src_height == 0 || crop_height == 0) {
|
src_width > INT_MAX / 4 || crop_width <= 0 || src_height == 0 ||
|
||||||
|
crop_height == 0) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -78,7 +79,8 @@ int ConvertToI420(const uint8_t* sample,
|
|||||||
if (need_buf) {
|
if (need_buf) {
|
||||||
int y_size = crop_width * abs_crop_height;
|
int y_size = crop_width * abs_crop_height;
|
||||||
int uv_size = ((crop_width + 1) / 2) * ((abs_crop_height + 1) / 2);
|
int uv_size = ((crop_width + 1) / 2) * ((abs_crop_height + 1) / 2);
|
||||||
const uint64_t rotate_buffer_size = (uint64_t)y_size + (uint64_t)uv_size * 2;
|
const uint64_t rotate_buffer_size =
|
||||||
|
(uint64_t)y_size + (uint64_t)uv_size * 2;
|
||||||
if (rotate_buffer_size > SIZE_MAX) {
|
if (rotate_buffer_size > SIZE_MAX) {
|
||||||
return -1; // Invalid size.
|
return -1; // Invalid size.
|
||||||
}
|
}
|
||||||
|
|||||||
@ -191,7 +191,8 @@ static int ARGBRotate180(const uint8_t* src_argb,
|
|||||||
#endif
|
#endif
|
||||||
#if defined(HAS_COPYROW_AVX512BW)
|
#if defined(HAS_COPYROW_AVX512BW)
|
||||||
if (TestCpuFlag(kCpuHasAVX512BW)) {
|
if (TestCpuFlag(kCpuHasAVX512BW)) {
|
||||||
CopyRow = IS_ALIGNED(width * 4, 128) ? CopyRow_AVX512BW : CopyRow_Any_AVX512BW;
|
CopyRow =
|
||||||
|
IS_ALIGNED(width * 4, 128) ? CopyRow_AVX512BW : CopyRow_Any_AVX512BW;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAS_COPYROW_ERMS)
|
#if defined(HAS_COPYROW_ERMS)
|
||||||
|
|||||||
@ -475,8 +475,7 @@ void ARGB4444ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ARGBToRGB24Row_SSSE3(const uint8_t* src, uint8_t* dst, int width) {
|
void ARGBToRGB24Row_SSSE3(const uint8_t* src, uint8_t* dst, int width) {
|
||||||
asm volatile (
|
asm volatile("movdqa %3,%%xmm6 \n"
|
||||||
"movdqa %3,%%xmm6 \n"
|
|
||||||
|
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -509,12 +508,12 @@ void ARGBToRGB24Row_SSSE3(const uint8_t* src, uint8_t* dst, int width) {
|
|||||||
"+r"(dst), // %1
|
"+r"(dst), // %1
|
||||||
"+r"(width) // %2
|
"+r"(width) // %2
|
||||||
: "m"(kShuffleMaskARGBToRGB24) // %3
|
: "m"(kShuffleMaskARGBToRGB24) // %3
|
||||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5",
|
||||||
|
"xmm6");
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARGBToRAWRow_SSSE3(const uint8_t* src, uint8_t* dst, int width) {
|
void ARGBToRAWRow_SSSE3(const uint8_t* src, uint8_t* dst, int width) {
|
||||||
asm volatile (
|
asm volatile("movdqa %3,%%xmm6 \n"
|
||||||
"movdqa %3,%%xmm6 \n"
|
|
||||||
|
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -547,7 +546,8 @@ void ARGBToRAWRow_SSSE3(const uint8_t* src, uint8_t* dst, int width) {
|
|||||||
"+r"(dst), // %1
|
"+r"(dst), // %1
|
||||||
"+r"(width) // %2
|
"+r"(width) // %2
|
||||||
: "m"(kShuffleMaskARGBToRAW) // %3
|
: "m"(kShuffleMaskARGBToRAW) // %3
|
||||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5",
|
||||||
|
"xmm6");
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_ARGBTORGB24ROW_AVX2
|
#ifdef HAS_ARGBTORGB24ROW_AVX2
|
||||||
@ -1159,8 +1159,7 @@ void AR64ToARGBRow_SSSE3(const uint16_t* src_ar64,
|
|||||||
void AB64ToARGBRow_SSSE3(const uint16_t* src_ab64,
|
void AB64ToARGBRow_SSSE3(const uint16_t* src_ab64,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (
|
asm volatile("movdqa %3,%%xmm2 \n"
|
||||||
"movdqa %3,%%xmm2 \n"
|
|
||||||
|
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -1265,8 +1264,7 @@ void AR64ToARGBRow_AVX2(const uint16_t* src_ar64,
|
|||||||
void AB64ToARGBRow_AVX2(const uint16_t* src_ab64,
|
void AB64ToARGBRow_AVX2(const uint16_t* src_ab64,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (
|
asm volatile("vbroadcastf128 %3,%%ymm2 \n" LABELALIGN
|
||||||
"vbroadcastf128 %3,%%ymm2 \n" LABELALIGN
|
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vmovdqu (%0),%%ymm0 \n"
|
"vmovdqu (%0),%%ymm0 \n"
|
||||||
"vmovdqu 0x20(%0),%%ymm1 \n"
|
"vmovdqu 0x20(%0),%%ymm1 \n"
|
||||||
@ -2657,7 +2655,8 @@ void OMITFP I444AlphaToARGBRow_SSSE3(const uint8_t* y_buf,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (YUVTORGB_SETUP(
|
asm volatile(
|
||||||
|
YUVTORGB_SETUP(
|
||||||
yuvconstants) "sub %[u_buf],%[v_buf] \n"
|
yuvconstants) "sub %[u_buf],%[v_buf] \n"
|
||||||
|
|
||||||
LABELALIGN "1: \n" READYUVA444
|
LABELALIGN "1: \n" READYUVA444
|
||||||
@ -2983,7 +2982,8 @@ void OMITFP I210AlphaToARGBRow_SSSE3(const uint16_t* y_buf,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (YUVTORGB_SETUP(
|
asm volatile(
|
||||||
|
YUVTORGB_SETUP(
|
||||||
yuvconstants) "sub %[u_buf],%[v_buf] \n"
|
yuvconstants) "sub %[u_buf],%[v_buf] \n"
|
||||||
|
|
||||||
LABELALIGN "1: \n" READYUVA210
|
LABELALIGN "1: \n" READYUVA210
|
||||||
@ -3015,7 +3015,8 @@ void OMITFP I410AlphaToARGBRow_SSSE3(const uint16_t* y_buf,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (YUVTORGB_SETUP(
|
asm volatile(
|
||||||
|
YUVTORGB_SETUP(
|
||||||
yuvconstants) "sub %[u_buf],%[v_buf] \n"
|
yuvconstants) "sub %[u_buf],%[v_buf] \n"
|
||||||
|
|
||||||
LABELALIGN "1: \n" READYUVA410
|
LABELALIGN "1: \n" READYUVA410
|
||||||
@ -3081,7 +3082,8 @@ void OMITFP I422AlphaToARGBRow_SSSE3(const uint8_t* y_buf,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (YUVTORGB_SETUP(
|
asm volatile(
|
||||||
|
YUVTORGB_SETUP(
|
||||||
yuvconstants) "sub %[u_buf],%[v_buf] \n"
|
yuvconstants) "sub %[u_buf],%[v_buf] \n"
|
||||||
|
|
||||||
LABELALIGN "1: \n" READYUVA422
|
LABELALIGN "1: \n" READYUVA422
|
||||||
@ -3109,7 +3111,8 @@ void OMITFP NV12ToARGBRow_SSSE3(const uint8_t* y_buf,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (YUVTORGB_SETUP(
|
asm volatile(
|
||||||
|
YUVTORGB_SETUP(
|
||||||
yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n"
|
yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
|
|
||||||
LABELALIGN "1: \n" READNV12
|
LABELALIGN "1: \n" READNV12
|
||||||
@ -3130,7 +3133,8 @@ void OMITFP NV21ToARGBRow_SSSE3(const uint8_t* y_buf,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (YUVTORGB_SETUP(
|
asm volatile(
|
||||||
|
YUVTORGB_SETUP(
|
||||||
yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n"
|
yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
|
|
||||||
LABELALIGN "1: \n" READNV21
|
LABELALIGN "1: \n" READNV21
|
||||||
@ -3196,7 +3200,8 @@ void OMITFP P210ToARGBRow_SSSE3(const uint16_t* y_buf,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (YUVTORGB_SETUP(
|
asm volatile(
|
||||||
|
YUVTORGB_SETUP(
|
||||||
yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n"
|
yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
|
|
||||||
LABELALIGN "1: \n" READP210
|
LABELALIGN "1: \n" READP210
|
||||||
@ -3217,7 +3222,8 @@ void OMITFP P410ToARGBRow_SSSE3(const uint16_t* y_buf,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (YUVTORGB_SETUP(
|
asm volatile(
|
||||||
|
YUVTORGB_SETUP(
|
||||||
yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n"
|
yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n"
|
||||||
|
|
||||||
LABELALIGN "1: \n" READP410
|
LABELALIGN "1: \n" READP410
|
||||||
@ -4051,7 +4057,8 @@ void OMITFP I210AlphaToARGBRow_AVX2(const uint16_t* y_buf,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (YUVTORGB_SETUP_AVX2(
|
asm volatile(
|
||||||
|
YUVTORGB_SETUP_AVX2(
|
||||||
yuvconstants) "sub %[u_buf],%[v_buf] \n"
|
yuvconstants) "sub %[u_buf],%[v_buf] \n"
|
||||||
|
|
||||||
LABELALIGN "1: \n" READYUVA210_AVX2
|
LABELALIGN "1: \n" READYUVA210_AVX2
|
||||||
@ -4086,7 +4093,8 @@ void OMITFP I410AlphaToARGBRow_AVX2(const uint16_t* y_buf,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (YUVTORGB_SETUP_AVX2(
|
asm volatile(
|
||||||
|
YUVTORGB_SETUP_AVX2(
|
||||||
yuvconstants) "sub %[u_buf],%[v_buf] \n"
|
yuvconstants) "sub %[u_buf],%[v_buf] \n"
|
||||||
|
|
||||||
LABELALIGN "1: \n" READYUVA410_AVX2
|
LABELALIGN "1: \n" READYUVA410_AVX2
|
||||||
@ -4161,7 +4169,8 @@ void OMITFP I444AlphaToARGBRow_AVX2(const uint8_t* y_buf,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (YUVTORGB_SETUP_AVX2(
|
asm volatile(
|
||||||
|
YUVTORGB_SETUP_AVX2(
|
||||||
yuvconstants) "sub %[u_buf],%[v_buf] \n"
|
yuvconstants) "sub %[u_buf],%[v_buf] \n"
|
||||||
|
|
||||||
LABELALIGN "1: \n" READYUVA444_AVX2
|
LABELALIGN "1: \n" READYUVA444_AVX2
|
||||||
@ -4195,7 +4204,8 @@ void OMITFP I422AlphaToARGBRow_AVX2(const uint8_t* y_buf,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (YUVTORGB_SETUP_AVX2(
|
asm volatile(
|
||||||
|
YUVTORGB_SETUP_AVX2(
|
||||||
yuvconstants) "sub %[u_buf],%[v_buf] \n"
|
yuvconstants) "sub %[u_buf],%[v_buf] \n"
|
||||||
|
|
||||||
LABELALIGN "1: \n" READYUVA422_AVX2
|
LABELALIGN "1: \n" READYUVA422_AVX2
|
||||||
@ -4271,7 +4281,8 @@ void OMITFP NV12ToARGBRow_AVX2(const uint8_t* y_buf,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (YUVTORGB_SETUP_AVX2(
|
asm volatile(
|
||||||
|
YUVTORGB_SETUP_AVX2(
|
||||||
yuvconstants) "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
yuvconstants) "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
||||||
|
|
||||||
LABELALIGN "1: \n" READNV12_AVX2
|
LABELALIGN "1: \n" READNV12_AVX2
|
||||||
@ -4297,7 +4308,8 @@ void OMITFP NV21ToARGBRow_AVX2(const uint8_t* y_buf,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (YUVTORGB_SETUP_AVX2(
|
asm volatile(
|
||||||
|
YUVTORGB_SETUP_AVX2(
|
||||||
yuvconstants) "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
yuvconstants) "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
||||||
|
|
||||||
LABELALIGN "1: \n" READNV21_AVX2
|
LABELALIGN "1: \n" READNV21_AVX2
|
||||||
@ -4378,7 +4390,8 @@ void OMITFP P210ToARGBRow_AVX2(const uint16_t* y_buf,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (YUVTORGB_SETUP_AVX2(
|
asm volatile(
|
||||||
|
YUVTORGB_SETUP_AVX2(
|
||||||
yuvconstants) "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
yuvconstants) "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
||||||
|
|
||||||
LABELALIGN "1: \n" READP210_AVX2
|
LABELALIGN "1: \n" READP210_AVX2
|
||||||
@ -4404,7 +4417,8 @@ void OMITFP P410ToARGBRow_AVX2(const uint16_t* y_buf,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (YUVTORGB_SETUP_AVX2(
|
asm volatile(
|
||||||
|
YUVTORGB_SETUP_AVX2(
|
||||||
yuvconstants) "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
yuvconstants) "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
||||||
|
|
||||||
LABELALIGN "1: \n" READP410_AVX2
|
LABELALIGN "1: \n" READP410_AVX2
|
||||||
@ -4590,8 +4604,7 @@ static const uvec8 kShuffleMirror = {15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u,
|
|||||||
|
|
||||||
void MirrorRow_SSSE3(const uint8_t* src, uint8_t* dst, int width) {
|
void MirrorRow_SSSE3(const uint8_t* src, uint8_t* dst, int width) {
|
||||||
intptr_t temp_width = (intptr_t)(width);
|
intptr_t temp_width = (intptr_t)(width);
|
||||||
asm volatile (
|
asm volatile("movdqa %3,%%xmm5 \n"
|
||||||
"movdqa %3,%%xmm5 \n"
|
|
||||||
|
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -4612,8 +4625,7 @@ void MirrorRow_SSSE3(const uint8_t* src, uint8_t* dst, int width) {
|
|||||||
#ifdef HAS_MIRRORROW_AVX2
|
#ifdef HAS_MIRRORROW_AVX2
|
||||||
void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width) {
|
void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width) {
|
||||||
intptr_t temp_width = (intptr_t)(width);
|
intptr_t temp_width = (intptr_t)(width);
|
||||||
asm volatile (
|
asm volatile("vbroadcastf128 %3,%%ymm5 \n"
|
||||||
"vbroadcastf128 %3,%%ymm5 \n"
|
|
||||||
|
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -4640,8 +4652,7 @@ static const uvec8 kShuffleMirrorUV = {14u, 15u, 12u, 13u, 10u, 11u, 8u, 9u,
|
|||||||
|
|
||||||
void MirrorUVRow_SSSE3(const uint8_t* src_uv, uint8_t* dst_uv, int width) {
|
void MirrorUVRow_SSSE3(const uint8_t* src_uv, uint8_t* dst_uv, int width) {
|
||||||
intptr_t temp_width = (intptr_t)(width);
|
intptr_t temp_width = (intptr_t)(width);
|
||||||
asm volatile (
|
asm volatile("movdqa %3,%%xmm5 \n"
|
||||||
"movdqa %3,%%xmm5 \n"
|
|
||||||
|
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -4662,8 +4673,7 @@ void MirrorUVRow_SSSE3(const uint8_t* src_uv, uint8_t* dst_uv, int width) {
|
|||||||
#ifdef HAS_MIRRORUVROW_AVX2
|
#ifdef HAS_MIRRORUVROW_AVX2
|
||||||
void MirrorUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_uv, int width) {
|
void MirrorUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_uv, int width) {
|
||||||
intptr_t temp_width = (intptr_t)(width);
|
intptr_t temp_width = (intptr_t)(width);
|
||||||
asm volatile (
|
asm volatile("vbroadcastf128 %3,%%ymm5 \n"
|
||||||
"vbroadcastf128 %3,%%ymm5 \n"
|
|
||||||
|
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -4767,8 +4777,7 @@ void RGB24MirrorRow_SSSE3(const uint8_t* src_rgb24,
|
|||||||
|
|
||||||
void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width) {
|
void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width) {
|
||||||
intptr_t temp_width = (intptr_t)(width);
|
intptr_t temp_width = (intptr_t)(width);
|
||||||
asm volatile (
|
asm volatile("lea -0x10(%0,%2,4),%0 \n"
|
||||||
"lea -0x10(%0,%2,4),%0 \n"
|
|
||||||
|
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -4792,8 +4801,7 @@ void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width) {
|
|||||||
static const ulvec32 kARGBShuffleMirror_AVX2 = {7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u};
|
static const ulvec32 kARGBShuffleMirror_AVX2 = {7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u};
|
||||||
void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width) {
|
void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width) {
|
||||||
intptr_t temp_width = (intptr_t)(width);
|
intptr_t temp_width = (intptr_t)(width);
|
||||||
asm volatile (
|
asm volatile("vmovdqu %3,%%ymm5 \n"
|
||||||
"vmovdqu %3,%%ymm5 \n"
|
|
||||||
|
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -5026,8 +5034,7 @@ void MergeUVRow_AVX512BW(const uint8_t* src_u,
|
|||||||
const uint8_t* src_v,
|
const uint8_t* src_v,
|
||||||
uint8_t* dst_uv,
|
uint8_t* dst_uv,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (
|
asm volatile("sub %0,%1 \n"
|
||||||
"sub %0,%1 \n"
|
|
||||||
|
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -5055,8 +5062,7 @@ void MergeUVRow_AVX2(const uint8_t* src_u,
|
|||||||
const uint8_t* src_v,
|
const uint8_t* src_v,
|
||||||
uint8_t* dst_uv,
|
uint8_t* dst_uv,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (
|
asm volatile("sub %0,%1 \n"
|
||||||
"sub %0,%1 \n"
|
|
||||||
|
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -5084,8 +5090,7 @@ void MergeUVRow_SSE2(const uint8_t* src_u,
|
|||||||
const uint8_t* src_v,
|
const uint8_t* src_v,
|
||||||
uint8_t* dst_uv,
|
uint8_t* dst_uv,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (
|
asm volatile("sub %0,%1 \n"
|
||||||
"sub %0,%1 \n"
|
|
||||||
|
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -5334,8 +5339,7 @@ void Convert16To8Row_AVX512BW(const uint16_t* src_y,
|
|||||||
uint8_t* dst_y,
|
uint8_t* dst_y,
|
||||||
int scale,
|
int scale,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (
|
asm volatile("vpbroadcastw %3,%%zmm2 \n"
|
||||||
"vpbroadcastw %3,%%zmm2 \n"
|
|
||||||
|
|
||||||
// 64 pixels per loop.
|
// 64 pixels per loop.
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
@ -5514,8 +5518,11 @@ static const uvec8 kSplitRGBShuffleSSE41[5] = {
|
|||||||
{0u, 0u, 128u, 0u, 0u, 128u, 0u, 0u, 128u, 0u, 0u, 128u, 0u, 0u, 128u, 0u},
|
{0u, 0u, 128u, 0u, 0u, 128u, 0u, 0u, 128u, 0u, 0u, 128u, 0u, 0u, 128u, 0u},
|
||||||
};
|
};
|
||||||
|
|
||||||
void SplitRGBRow_SSE41(const uint8_t* src_rgb, uint8_t* dst_r,
|
void SplitRGBRow_SSE41(const uint8_t* src_rgb,
|
||||||
uint8_t* dst_g, uint8_t* dst_b, int width) {
|
uint8_t* dst_r,
|
||||||
|
uint8_t* dst_g,
|
||||||
|
uint8_t* dst_b,
|
||||||
|
int width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"movdqa 48(%5), %%xmm0 \n"
|
"movdqa 48(%5), %%xmm0 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -5554,8 +5561,11 @@ void SplitRGBRow_SSE41(const uint8_t* src_rgb, uint8_t* dst_r,
|
|||||||
#endif // HAS_SPLITRGBROW_SSE41
|
#endif // HAS_SPLITRGBROW_SSE41
|
||||||
|
|
||||||
#ifdef HAS_SPLITRGBROW_AVX2
|
#ifdef HAS_SPLITRGBROW_AVX2
|
||||||
void SplitRGBRow_AVX2(const uint8_t* src_rgb, uint8_t* dst_r,
|
void SplitRGBRow_AVX2(const uint8_t* src_rgb,
|
||||||
uint8_t* dst_g, uint8_t* dst_b, int width) {
|
uint8_t* dst_r,
|
||||||
|
uint8_t* dst_g,
|
||||||
|
uint8_t* dst_b,
|
||||||
|
int width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"vbroadcasti128 48(%5), %%ymm0 \n"
|
"vbroadcasti128 48(%5), %%ymm0 \n"
|
||||||
"vbroadcasti128 64(%5), %%ymm7 \n"
|
"vbroadcasti128 64(%5), %%ymm7 \n"
|
||||||
@ -5607,7 +5617,8 @@ void SplitRGBRow_AVX2(const uint8_t* src_rgb, uint8_t* dst_r,
|
|||||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
||||||
"xmm7"
|
"xmm7"
|
||||||
#if defined(__x86_64__)
|
#if defined(__x86_64__)
|
||||||
, "xmm8", "xmm9", "xmm10"
|
,
|
||||||
|
"xmm8", "xmm9", "xmm10"
|
||||||
#endif
|
#endif
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@ -6530,8 +6541,7 @@ void CopyRow_AVX512BW(const uint8_t* src, uint8_t* dst, int width) {
|
|||||||
// Multiple of 1.
|
// Multiple of 1.
|
||||||
void CopyRow_ERMS(const uint8_t* src, uint8_t* dst, int width) {
|
void CopyRow_ERMS(const uint8_t* src, uint8_t* dst, int width) {
|
||||||
size_t width_tmp = (size_t)(width);
|
size_t width_tmp = (size_t)(width);
|
||||||
asm volatile (
|
asm volatile("rep movsb \n"
|
||||||
"rep movsb \n"
|
|
||||||
: "+S"(src), // %0
|
: "+S"(src), // %0
|
||||||
"+D"(dst), // %1
|
"+D"(dst), // %1
|
||||||
"+c"(width_tmp) // %2
|
"+c"(width_tmp) // %2
|
||||||
@ -6741,8 +6751,7 @@ void ARGBCopyYToAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width) {
|
|||||||
void SetRow_X86(uint8_t* dst, uint8_t v8, int width) {
|
void SetRow_X86(uint8_t* dst, uint8_t v8, int width) {
|
||||||
size_t width_tmp = (size_t)(width >> 2);
|
size_t width_tmp = (size_t)(width >> 2);
|
||||||
const uint32_t v32 = v8 * 0x01010101u; // Duplicate byte to all bytes.
|
const uint32_t v32 = v8 * 0x01010101u; // Duplicate byte to all bytes.
|
||||||
asm volatile (
|
asm volatile("rep stosl \n"
|
||||||
"rep stosl \n"
|
|
||||||
: "+D"(dst), // %0
|
: "+D"(dst), // %0
|
||||||
"+c"(width_tmp) // %1
|
"+c"(width_tmp) // %1
|
||||||
: "a"(v32) // %2
|
: "a"(v32) // %2
|
||||||
@ -6751,8 +6760,7 @@ void SetRow_X86(uint8_t* dst, uint8_t v8, int width) {
|
|||||||
|
|
||||||
void SetRow_ERMS(uint8_t* dst, uint8_t v8, int width) {
|
void SetRow_ERMS(uint8_t* dst, uint8_t v8, int width) {
|
||||||
size_t width_tmp = (size_t)(width);
|
size_t width_tmp = (size_t)(width);
|
||||||
asm volatile (
|
asm volatile("rep stosb \n"
|
||||||
"rep stosb \n"
|
|
||||||
: "+D"(dst), // %0
|
: "+D"(dst), // %0
|
||||||
"+c"(width_tmp) // %1
|
"+c"(width_tmp) // %1
|
||||||
: "a"(v8) // %2
|
: "a"(v8) // %2
|
||||||
@ -6761,8 +6769,7 @@ void SetRow_ERMS(uint8_t* dst, uint8_t v8, int width) {
|
|||||||
|
|
||||||
void ARGBSetRow_X86(uint8_t* dst_argb, uint32_t v32, int width) {
|
void ARGBSetRow_X86(uint8_t* dst_argb, uint32_t v32, int width) {
|
||||||
size_t width_tmp = (size_t)(width);
|
size_t width_tmp = (size_t)(width);
|
||||||
asm volatile (
|
asm volatile("rep stosl \n"
|
||||||
"rep stosl \n"
|
|
||||||
: "+D"(dst_argb), // %0
|
: "+D"(dst_argb), // %0
|
||||||
"+c"(width_tmp) // %1
|
"+c"(width_tmp) // %1
|
||||||
: "a"(v32) // %2
|
: "a"(v32) // %2
|
||||||
@ -7923,8 +7930,7 @@ void ARGBMultiplyRow_SSE2(const uint8_t* src_argb,
|
|||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (
|
asm volatile("pxor %%xmm5,%%xmm5 \n"
|
||||||
"pxor %%xmm5,%%xmm5 \n"
|
|
||||||
|
|
||||||
// 4 pixel loop.
|
// 4 pixel loop.
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
@ -7961,8 +7967,7 @@ void ARGBMultiplyRow_AVX2(const uint8_t* src_argb,
|
|||||||
const uint8_t* src_argb1,
|
const uint8_t* src_argb1,
|
||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (
|
asm volatile("vpxor %%ymm5,%%ymm5,%%ymm5 \n"
|
||||||
"vpxor %%ymm5,%%ymm5,%%ymm5 \n"
|
|
||||||
|
|
||||||
// 4 pixel loop.
|
// 4 pixel loop.
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
@ -8809,8 +8814,7 @@ void ARGBShuffleRow_SSSE3(const uint8_t* src_argb,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const uint8_t* shuffler,
|
const uint8_t* shuffler,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (
|
asm volatile("movdqu (%3),%%xmm5 \n"
|
||||||
"movdqu (%3),%%xmm5 \n"
|
|
||||||
|
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -8838,8 +8842,7 @@ void ARGBShuffleRow_AVX2(const uint8_t* src_argb,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const uint8_t* shuffler,
|
const uint8_t* shuffler,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (
|
asm volatile("vbroadcastf128 (%3),%%ymm5 \n"
|
||||||
"vbroadcastf128 (%3),%%ymm5 \n"
|
|
||||||
|
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -8868,8 +8871,7 @@ void I422ToYUY2Row_SSE2(const uint8_t* src_y,
|
|||||||
const uint8_t* src_v,
|
const uint8_t* src_v,
|
||||||
uint8_t* dst_yuy2,
|
uint8_t* dst_yuy2,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (
|
asm volatile("sub %1,%2 \n"
|
||||||
"sub %1,%2 \n"
|
|
||||||
|
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -8903,8 +8905,7 @@ void I422ToUYVYRow_SSE2(const uint8_t* src_y,
|
|||||||
const uint8_t* src_v,
|
const uint8_t* src_v,
|
||||||
uint8_t* dst_uyvy,
|
uint8_t* dst_uyvy,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (
|
asm volatile("sub %1,%2 \n"
|
||||||
"sub %1,%2 \n"
|
|
||||||
|
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -8938,8 +8939,7 @@ void I422ToYUY2Row_AVX2(const uint8_t* src_y,
|
|||||||
const uint8_t* src_v,
|
const uint8_t* src_v,
|
||||||
uint8_t* dst_yuy2,
|
uint8_t* dst_yuy2,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (
|
asm volatile("sub %1,%2 \n"
|
||||||
"sub %1,%2 \n"
|
|
||||||
|
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -8976,8 +8976,7 @@ void I422ToUYVYRow_AVX2(const uint8_t* src_y,
|
|||||||
const uint8_t* src_v,
|
const uint8_t* src_v,
|
||||||
uint8_t* dst_uyvy,
|
uint8_t* dst_uyvy,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (
|
asm volatile("sub %1,%2 \n"
|
||||||
"sub %1,%2 \n"
|
|
||||||
|
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -9013,8 +9012,7 @@ void ARGBPolynomialRow_SSE2(const uint8_t* src_argb,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const float* poly,
|
const float* poly,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (
|
asm volatile("pxor %%xmm3,%%xmm3 \n"
|
||||||
"pxor %%xmm3,%%xmm3 \n"
|
|
||||||
|
|
||||||
// 2 pixel loop.
|
// 2 pixel loop.
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
@ -9059,7 +9057,8 @@ void ARGBPolynomialRow_SSE2(const uint8_t* src_argb,
|
|||||||
"+r"(dst_argb), // %1
|
"+r"(dst_argb), // %1
|
||||||
"+r"(width) // %2
|
"+r"(width) // %2
|
||||||
: "r"(poly) // %3
|
: "r"(poly) // %3
|
||||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5",
|
||||||
|
"xmm6");
|
||||||
}
|
}
|
||||||
#endif // HAS_ARGBPOLYNOMIALROW_SSE2
|
#endif // HAS_ARGBPOLYNOMIALROW_SSE2
|
||||||
|
|
||||||
@ -9551,8 +9550,7 @@ static const uvec8 kShuffleUVToVU = {1u, 0u, 3u, 2u, 5u, 4u, 7u, 6u,
|
|||||||
|
|
||||||
// Convert UV plane of NV12 to VU of NV21.
|
// Convert UV plane of NV12 to VU of NV21.
|
||||||
void SwapUVRow_SSSE3(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
|
void SwapUVRow_SSSE3(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
|
||||||
asm volatile (
|
asm volatile("movdqu %3,%%xmm5 \n"
|
||||||
"movdqu %3,%%xmm5 \n"
|
|
||||||
|
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
@ -9576,8 +9574,7 @@ void SwapUVRow_SSSE3(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
|
|||||||
|
|
||||||
#ifdef HAS_SWAPUVROW_AVX2
|
#ifdef HAS_SWAPUVROW_AVX2
|
||||||
void SwapUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
|
void SwapUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
|
||||||
asm volatile (
|
asm volatile("vbroadcastf128 %3,%%ymm5 \n"
|
||||||
"vbroadcastf128 %3,%%ymm5 \n"
|
|
||||||
|
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
|
|||||||
@ -290,7 +290,8 @@ void I210ToAR30Row_NEON(const uint16_t* src_y,
|
|||||||
const vec16* rgb_coeff = &yuvconstants->kRGBCoeffBias;
|
const vec16* rgb_coeff = &yuvconstants->kRGBCoeffBias;
|
||||||
uint16_t limit = 0x3ff0;
|
uint16_t limit = 0x3ff0;
|
||||||
uint16_t alpha = 0xc000;
|
uint16_t alpha = 0xc000;
|
||||||
asm volatile (YUVTORGB_SETUP
|
asm volatile(
|
||||||
|
YUVTORGB_SETUP
|
||||||
"dup v22.8h, %w[limit] \n"
|
"dup v22.8h, %w[limit] \n"
|
||||||
"dup v23.8h, %w[alpha] \n"
|
"dup v23.8h, %w[alpha] \n"
|
||||||
"1: \n" READYUV210 NVTORGB
|
"1: \n" READYUV210 NVTORGB
|
||||||
@ -318,7 +319,8 @@ void I410ToAR30Row_NEON(const uint16_t* src_y,
|
|||||||
const vec16* rgb_coeff = &yuvconstants->kRGBCoeffBias;
|
const vec16* rgb_coeff = &yuvconstants->kRGBCoeffBias;
|
||||||
uint16_t limit = 0x3ff0;
|
uint16_t limit = 0x3ff0;
|
||||||
uint16_t alpha = 0xc000;
|
uint16_t alpha = 0xc000;
|
||||||
asm volatile (YUVTORGB_SETUP
|
asm volatile(
|
||||||
|
YUVTORGB_SETUP
|
||||||
"dup v22.8h, %w[limit] \n"
|
"dup v22.8h, %w[limit] \n"
|
||||||
"dup v23.8h, %w[alpha] \n"
|
"dup v23.8h, %w[alpha] \n"
|
||||||
"1: \n" READYUV410 NVTORGB
|
"1: \n" READYUV410 NVTORGB
|
||||||
@ -369,7 +371,8 @@ void I210ToARGBRow_NEON(const uint16_t* src_y,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (YUVTORGB_SETUP
|
asm volatile(
|
||||||
|
YUVTORGB_SETUP
|
||||||
"movi v19.8b, #255 \n"
|
"movi v19.8b, #255 \n"
|
||||||
"1: \n" READYUV210 NVTORGB RGBTORGB8
|
"1: \n" READYUV210 NVTORGB RGBTORGB8
|
||||||
"subs %w[width], %w[width], #8 \n"
|
"subs %w[width], %w[width], #8 \n"
|
||||||
@ -391,7 +394,8 @@ void I410ToARGBRow_NEON(const uint16_t* src_y,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (YUVTORGB_SETUP
|
asm volatile(
|
||||||
|
YUVTORGB_SETUP
|
||||||
"movi v19.8b, #255 \n"
|
"movi v19.8b, #255 \n"
|
||||||
"1: \n" READYUV410 NVTORGB RGBTORGB8
|
"1: \n" READYUV410 NVTORGB RGBTORGB8
|
||||||
"subs %w[width], %w[width], #8 \n"
|
"subs %w[width], %w[width], #8 \n"
|
||||||
@ -626,7 +630,8 @@ void I410AlphaToARGBRow_NEON(const uint16_t* src_y,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (YUVTORGB_SETUP
|
asm volatile(
|
||||||
|
YUVTORGB_SETUP
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"ld1 {v19.16b}, [%[src_a]], #16 \n" READYUV410
|
"ld1 {v19.16b}, [%[src_a]], #16 \n" READYUV410
|
||||||
"uqshrn v19.8b, v19.8h, #2 \n" NVTORGB RGBTORGB8
|
"uqshrn v19.8b, v19.8h, #2 \n" NVTORGB RGBTORGB8
|
||||||
@ -651,7 +656,8 @@ void I210AlphaToARGBRow_NEON(const uint16_t* src_y,
|
|||||||
uint8_t* dst_argb,
|
uint8_t* dst_argb,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (YUVTORGB_SETUP
|
asm volatile(
|
||||||
|
YUVTORGB_SETUP
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"ld1 {v19.16b}, [%[src_a]], #16 \n" READYUV210
|
"ld1 {v19.16b}, [%[src_a]], #16 \n" READYUV210
|
||||||
"uqshrn v19.8b, v19.8h, #2 \n" NVTORGB RGBTORGB8
|
"uqshrn v19.8b, v19.8h, #2 \n" NVTORGB RGBTORGB8
|
||||||
@ -807,7 +813,8 @@ void I422ToARGB1555Row_NEON(const uint8_t* src_y,
|
|||||||
uint8_t* dst_argb1555,
|
uint8_t* dst_argb1555,
|
||||||
const struct YuvConstants* yuvconstants,
|
const struct YuvConstants* yuvconstants,
|
||||||
int width) {
|
int width) {
|
||||||
asm volatile (YUVTORGB_SETUP
|
asm volatile(
|
||||||
|
YUVTORGB_SETUP
|
||||||
"movi v19.8h, #0x80, lsl #8 \n"
|
"movi v19.8h, #0x80, lsl #8 \n"
|
||||||
"1: \n" //
|
"1: \n" //
|
||||||
READYUV422 I4XXTORGB RGBTORGB8_TOP
|
READYUV422 I4XXTORGB RGBTORGB8_TOP
|
||||||
|
|||||||
@ -10,8 +10,8 @@
|
|||||||
|
|
||||||
#include "libyuv/scale.h"
|
#include "libyuv/scale.h"
|
||||||
|
|
||||||
#include <limits.h>
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
#include <limits.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
@ -1233,10 +1233,9 @@ int YUVToARGBScaleClip(const uint8_t* src_y,
|
|||||||
(void)src_fourcc; // TODO(fbarchard): implement and/or assert.
|
(void)src_fourcc; // TODO(fbarchard): implement and/or assert.
|
||||||
(void)dst_fourcc;
|
(void)dst_fourcc;
|
||||||
const int abs_src_height = (src_height < 0) ? -src_height : src_height;
|
const int abs_src_height = (src_height < 0) ? -src_height : src_height;
|
||||||
if (!src_y || !src_u || !src_v || !dst_argb ||
|
if (!src_y || !src_u || !src_v || !dst_argb || src_width <= 0 ||
|
||||||
src_width <= 0 || src_width > INT_MAX / 4 || src_height == 0 ||
|
src_width > INT_MAX / 4 || src_height == 0 || dst_width <= 0 ||
|
||||||
dst_width <= 0 || dst_height <= 0 ||
|
dst_height <= 0 || clip_width <= 0 || clip_height <= 0) {
|
||||||
clip_width <= 0 || clip_height <= 0) {
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
const uint64_t argb_buffer_size = (uint64_t)src_width * abs_src_height * 4;
|
const uint64_t argb_buffer_size = (uint64_t)src_width * abs_src_height * 4;
|
||||||
@ -1250,9 +1249,9 @@ int YUVToARGBScaleClip(const uint8_t* src_y,
|
|||||||
I420ToARGB(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
|
I420ToARGB(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
|
||||||
argb_buffer, src_width * 4, src_width, src_height);
|
argb_buffer, src_width * 4, src_width, src_height);
|
||||||
|
|
||||||
r = ARGBScaleClip(argb_buffer, src_width * 4, src_width, abs_src_height, dst_argb,
|
r = ARGBScaleClip(argb_buffer, src_width * 4, src_width, abs_src_height,
|
||||||
dst_stride_argb, dst_width, dst_height, clip_x, clip_y,
|
dst_argb, dst_stride_argb, dst_width, dst_height, clip_x,
|
||||||
clip_width, clip_height, filtering);
|
clip_y, clip_width, clip_height, filtering);
|
||||||
free(argb_buffer);
|
free(argb_buffer);
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1761,8 +1761,7 @@ void ScaleRowUp2_Bilinear_16_AVX2(const uint16_t* src_ptr,
|
|||||||
void ScaleAddRow_SSE2(const uint8_t* src_ptr,
|
void ScaleAddRow_SSE2(const uint8_t* src_ptr,
|
||||||
uint16_t* dst_ptr,
|
uint16_t* dst_ptr,
|
||||||
int src_width) {
|
int src_width) {
|
||||||
asm volatile (
|
asm volatile("pxor %%xmm5,%%xmm5 \n"
|
||||||
"pxor %%xmm5,%%xmm5 \n"
|
|
||||||
|
|
||||||
// 16 pixel loop.
|
// 16 pixel loop.
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
@ -1793,8 +1792,7 @@ void ScaleAddRow_SSE2(const uint8_t* src_ptr,
|
|||||||
void ScaleAddRow_AVX2(const uint8_t* src_ptr,
|
void ScaleAddRow_AVX2(const uint8_t* src_ptr,
|
||||||
uint16_t* dst_ptr,
|
uint16_t* dst_ptr,
|
||||||
int src_width) {
|
int src_width) {
|
||||||
asm volatile (
|
asm volatile("vpxor %%ymm5,%%ymm5,%%ymm5 \n"
|
||||||
"vpxor %%ymm5,%%ymm5,%%ymm5 \n"
|
|
||||||
|
|
||||||
LABELALIGN
|
LABELALIGN
|
||||||
"1: \n"
|
"1: \n"
|
||||||
|
|||||||
@ -1178,12 +1178,8 @@ void ScaleARGBRowDownEven_NEON(const uint8_t* src_argb,
|
|||||||
"stp w10, w11, [%[dst]], #8 \n"
|
"stp w10, w11, [%[dst]], #8 \n"
|
||||||
"stp w12, w13, [%[dst]], #8 \n"
|
"stp w12, w13, [%[dst]], #8 \n"
|
||||||
"b.gt 1b \n"
|
"b.gt 1b \n"
|
||||||
: [src]"+r"(src_argb),
|
: [src] "+r"(src_argb), [src1] "+r"(src_argb1), [src2] "+r"(src_argb2),
|
||||||
[src1]"+r"(src_argb1),
|
[src3] "+r"(src_argb3), [dst] "+r"(dst_argb), [width] "+r"(dst_width),
|
||||||
[src2]"+r"(src_argb2),
|
|
||||||
[src3]"+r"(src_argb3),
|
|
||||||
[dst]"+r"(dst_argb),
|
|
||||||
[width]"+r"(dst_width),
|
|
||||||
[i] "+r"(i)
|
[i] "+r"(i)
|
||||||
: [step] "r"((int64_t)(src_stepx * 16))
|
: [step] "r"((int64_t)(src_stepx * 16))
|
||||||
: "memory", "cc", "w10", "w11", "w12", "w13");
|
: "memory", "cc", "w10", "w11", "w12", "w13");
|
||||||
|
|||||||
@ -10,8 +10,8 @@
|
|||||||
|
|
||||||
#include "libyuv/scale.h" /* For FilterMode */
|
#include "libyuv/scale.h" /* For FilterMode */
|
||||||
|
|
||||||
#include <limits.h>
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
#include <limits.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
@ -41,9 +41,9 @@ int RGBScale(const uint8_t* src_rgb,
|
|||||||
int dst_height,
|
int dst_height,
|
||||||
enum FilterMode filtering) {
|
enum FilterMode filtering) {
|
||||||
int r;
|
int r;
|
||||||
if (!src_rgb || !dst_rgb ||
|
if (!src_rgb || !dst_rgb || src_width <= 0 || src_width > INT_MAX / 4 ||
|
||||||
src_width <= 0 || src_width > INT_MAX / 4 || src_height == 0 ||
|
src_height == 0 || dst_width <= 0 || dst_width > INT_MAX / 4 ||
|
||||||
dst_width <= 0 || dst_width > INT_MAX / 4 || dst_height <= 0) {
|
dst_height <= 0) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
const int abs_src_height = (src_height < 0) ? -src_height : src_height;
|
const int abs_src_height = (src_height < 0) ? -src_height : src_height;
|
||||||
|
|||||||
@ -15,7 +15,6 @@ namespace libyuv {
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#if !defined(LIBYUV_DISABLE_SME) && defined(CLANG_HAS_SME) && \
|
#if !defined(LIBYUV_DISABLE_SME) && defined(CLANG_HAS_SME) && \
|
||||||
defined(__aarch64__)
|
defined(__aarch64__)
|
||||||
|
|
||||||
|
|||||||
@ -12,6 +12,7 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
|
||||||
|
#include "../unit_test/unit_test.h"
|
||||||
#include "libyuv/basic_types.h"
|
#include "libyuv/basic_types.h"
|
||||||
#include "libyuv/compare.h"
|
#include "libyuv/compare.h"
|
||||||
#include "libyuv/convert.h"
|
#include "libyuv/convert.h"
|
||||||
@ -19,7 +20,6 @@
|
|||||||
#include "libyuv/convert_from.h"
|
#include "libyuv/convert_from.h"
|
||||||
#include "libyuv/convert_from_argb.h"
|
#include "libyuv/convert_from_argb.h"
|
||||||
#include "libyuv/cpu_id.h"
|
#include "libyuv/cpu_id.h"
|
||||||
#include "../unit_test/unit_test.h"
|
|
||||||
#include "libyuv/planar_functions.h"
|
#include "libyuv/planar_functions.h"
|
||||||
#include "libyuv/rotate.h"
|
#include "libyuv/rotate.h"
|
||||||
#include "libyuv/video_common.h"
|
#include "libyuv/video_common.h"
|
||||||
|
|||||||
@ -169,8 +169,8 @@ TEST_F(LibYUVBaseTest, TestCpuHas) {
|
|||||||
}
|
}
|
||||||
#endif // defined(__loongarch__)
|
#endif // defined(__loongarch__)
|
||||||
|
|
||||||
#if defined(__i386__) || defined(__x86_64__) || \
|
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || \
|
||||||
defined(_M_IX86) || defined(_M_X64)
|
defined(_M_X64)
|
||||||
int has_x86 = TestCpuFlag(kCpuHasX86);
|
int has_x86 = TestCpuFlag(kCpuHasX86);
|
||||||
if (has_x86) {
|
if (has_x86) {
|
||||||
int has_sse2 = TestCpuFlag(kCpuHasSSE2);
|
int has_sse2 = TestCpuFlag(kCpuHasSSE2);
|
||||||
@ -215,7 +215,8 @@ TEST_F(LibYUVBaseTest, TestCpuHas) {
|
|||||||
printf("Has AVXVNNIINT8 0x%x\n", has_avxvnniint8);
|
printf("Has AVXVNNIINT8 0x%x\n", has_avxvnniint8);
|
||||||
printf("Has AMXINT8 0x%x\n", has_amxint8);
|
printf("Has AMXINT8 0x%x\n", has_amxint8);
|
||||||
}
|
}
|
||||||
#endif // defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
|
#endif // defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) ||
|
||||||
|
// defined(_M_X64)
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(LibYUVBaseTest, TestCompilerMacros) {
|
TEST_F(LibYUVBaseTest, TestCompilerMacros) {
|
||||||
|
|||||||
@ -1570,18 +1570,21 @@ static int TestCopyPlane(int benchmark_width,
|
|||||||
// Disable all optimizations.
|
// Disable all optimizations.
|
||||||
MaskCpuFlags(disable_cpu_flags);
|
MaskCpuFlags(disable_cpu_flags);
|
||||||
for (int i = 0; i < benchmark_iterations; i++) {
|
for (int i = 0; i < benchmark_iterations; i++) {
|
||||||
CopyPlane(orig_y + off, benchmark_width, dst_c, benchmark_width, benchmark_width, benchmark_height * invert);
|
CopyPlane(orig_y + off, benchmark_width, dst_c, benchmark_width,
|
||||||
|
benchmark_width, benchmark_height * invert);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Enable optimizations.
|
// Enable optimizations.
|
||||||
MaskCpuFlags(benchmark_cpu_info);
|
MaskCpuFlags(benchmark_cpu_info);
|
||||||
for (int i = 0; i < benchmark_iterations; i++) {
|
for (int i = 0; i < benchmark_iterations; i++) {
|
||||||
CopyPlane(orig_y + off, benchmark_width, dst_opt, benchmark_width, benchmark_width, benchmark_height * invert);
|
CopyPlane(orig_y + off, benchmark_width, dst_opt, benchmark_width,
|
||||||
|
benchmark_width, benchmark_height * invert);
|
||||||
}
|
}
|
||||||
|
|
||||||
int max_diff = 0;
|
int max_diff = 0;
|
||||||
for (int i = 0; i < y_plane_size; ++i) {
|
for (int i = 0; i < y_plane_size; ++i) {
|
||||||
int abs_diff = abs(static_cast<int>(dst_c[i]) - static_cast<int>(dst_opt[i]));
|
int abs_diff =
|
||||||
|
abs(static_cast<int>(dst_c[i]) - static_cast<int>(dst_opt[i]));
|
||||||
if (abs_diff > max_diff) {
|
if (abs_diff > max_diff) {
|
||||||
max_diff = abs_diff;
|
max_diff = abs_diff;
|
||||||
}
|
}
|
||||||
@ -2499,17 +2502,19 @@ static int TestHalfFloatPlane(int benchmark_width,
|
|||||||
// Disable all optimizations.
|
// Disable all optimizations.
|
||||||
MaskCpuFlags(disable_cpu_flags);
|
MaskCpuFlags(disable_cpu_flags);
|
||||||
for (j = 0; j < benchmark_iterations; j++) {
|
for (j = 0; j < benchmark_iterations; j++) {
|
||||||
HalfFloatPlane(reinterpret_cast<uint16_t*>(orig_y + off), benchmark_width * 2,
|
HalfFloatPlane(reinterpret_cast<uint16_t*>(orig_y + off),
|
||||||
reinterpret_cast<uint16_t*>(dst_c), benchmark_width * 2,
|
benchmark_width * 2, reinterpret_cast<uint16_t*>(dst_c),
|
||||||
scale, benchmark_width, benchmark_height * invert);
|
benchmark_width * 2, scale, benchmark_width,
|
||||||
|
benchmark_height * invert);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Enable optimizations.
|
// Enable optimizations.
|
||||||
MaskCpuFlags(benchmark_cpu_info);
|
MaskCpuFlags(benchmark_cpu_info);
|
||||||
for (j = 0; j < benchmark_iterations; j++) {
|
for (j = 0; j < benchmark_iterations; j++) {
|
||||||
HalfFloatPlane(reinterpret_cast<uint16_t*>(orig_y + off), benchmark_width * 2,
|
HalfFloatPlane(reinterpret_cast<uint16_t*>(orig_y + off),
|
||||||
reinterpret_cast<uint16_t*>(dst_opt), benchmark_width * 2,
|
benchmark_width * 2, reinterpret_cast<uint16_t*>(dst_opt),
|
||||||
scale, benchmark_width, benchmark_height * invert);
|
benchmark_width * 2, scale, benchmark_width,
|
||||||
|
benchmark_height * invert);
|
||||||
}
|
}
|
||||||
|
|
||||||
int max_diff = 0;
|
int max_diff = 0;
|
||||||
@ -2536,23 +2541,23 @@ TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_One) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_Opt) {
|
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_Opt) {
|
||||||
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
|
int diff = TestHalfFloatPlane(
|
||||||
benchmark_iterations_, disable_cpu_flags_,
|
benchmark_width_, benchmark_height_, benchmark_iterations_,
|
||||||
benchmark_cpu_info_, 1.0f / 65535.0f, 65535, +1, 0);
|
disable_cpu_flags_, benchmark_cpu_info_, 1.0f / 65535.0f, 65535, +1, 0);
|
||||||
EXPECT_EQ(0, diff);
|
EXPECT_EQ(0, diff);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_12bit_Opt) {
|
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_12bit_Opt) {
|
||||||
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
|
int diff = TestHalfFloatPlane(
|
||||||
benchmark_iterations_, disable_cpu_flags_,
|
benchmark_width_, benchmark_height_, benchmark_iterations_,
|
||||||
benchmark_cpu_info_, 1.0f / 4095.0f, 4095, +1, 0);
|
disable_cpu_flags_, benchmark_cpu_info_, 1.0f / 4095.0f, 4095, +1, 0);
|
||||||
EXPECT_EQ(0, diff);
|
EXPECT_EQ(0, diff);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_10bit_Opt) {
|
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_10bit_Opt) {
|
||||||
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
|
int diff = TestHalfFloatPlane(
|
||||||
benchmark_iterations_, disable_cpu_flags_,
|
benchmark_width_, benchmark_height_, benchmark_iterations_,
|
||||||
benchmark_cpu_info_, 1.0f / 1023.0f, 1023, +1, 0);
|
disable_cpu_flags_, benchmark_cpu_info_, 1.0f / 1023.0f, 1023, +1, 0);
|
||||||
EXPECT_EQ(0, diff);
|
EXPECT_EQ(0, diff);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2564,37 +2569,37 @@ TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_9bit_Opt) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Any) {
|
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Any) {
|
||||||
int diff = TestHalfFloatPlane(benchmark_width_ + 1, benchmark_height_,
|
int diff = TestHalfFloatPlane(
|
||||||
benchmark_iterations_, disable_cpu_flags_,
|
benchmark_width_ + 1, benchmark_height_, benchmark_iterations_,
|
||||||
benchmark_cpu_info_, 1.0f / 4096.0f, 4095, +1, 0);
|
disable_cpu_flags_, benchmark_cpu_info_, 1.0f / 4096.0f, 4095, +1, 0);
|
||||||
EXPECT_EQ(0, diff);
|
EXPECT_EQ(0, diff);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Unaligned) {
|
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Unaligned) {
|
||||||
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
|
int diff = TestHalfFloatPlane(
|
||||||
benchmark_iterations_, disable_cpu_flags_,
|
benchmark_width_, benchmark_height_, benchmark_iterations_,
|
||||||
benchmark_cpu_info_, 1.0f / 4096.0f, 4095, +1, 2);
|
disable_cpu_flags_, benchmark_cpu_info_, 1.0f / 4096.0f, 4095, +1, 2);
|
||||||
EXPECT_EQ(0, diff);
|
EXPECT_EQ(0, diff);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Invert) {
|
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Invert) {
|
||||||
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
|
int diff = TestHalfFloatPlane(
|
||||||
benchmark_iterations_, disable_cpu_flags_,
|
benchmark_width_, benchmark_height_, benchmark_iterations_,
|
||||||
benchmark_cpu_info_, 1.0f / 4096.0f, 4095, -1, 0);
|
disable_cpu_flags_, benchmark_cpu_info_, 1.0f / 4096.0f, 4095, -1, 0);
|
||||||
EXPECT_EQ(0, diff);
|
EXPECT_EQ(0, diff);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Opt) {
|
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Opt) {
|
||||||
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
|
int diff = TestHalfFloatPlane(
|
||||||
benchmark_iterations_, disable_cpu_flags_,
|
benchmark_width_, benchmark_height_, benchmark_iterations_,
|
||||||
benchmark_cpu_info_, 1.0f / 4096.0f, 4095, +1, 0);
|
disable_cpu_flags_, benchmark_cpu_info_, 1.0f / 4096.0f, 4095, +1, 0);
|
||||||
EXPECT_EQ(0, diff);
|
EXPECT_EQ(0, diff);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_denormal) {
|
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_denormal) {
|
||||||
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
|
int diff = TestHalfFloatPlane(
|
||||||
benchmark_iterations_, disable_cpu_flags_,
|
benchmark_width_, benchmark_height_, benchmark_iterations_,
|
||||||
benchmark_cpu_info_, 1.0f / 65535.0f, 65535, +1, 0);
|
disable_cpu_flags_, benchmark_cpu_info_, 1.0f / 65535.0f, 65535, +1, 0);
|
||||||
EXPECT_EQ(0, diff);
|
EXPECT_EQ(0, diff);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2605,8 +2610,7 @@ static void EnableFlushDenormalToZero(void) {
|
|||||||
"vmrs %0, fpscr \n"
|
"vmrs %0, fpscr \n"
|
||||||
"orr %0, %0, #0x1000000 \n"
|
"orr %0, %0, #0x1000000 \n"
|
||||||
"vmsr fpscr, %0 \n"
|
"vmsr fpscr, %0 \n"
|
||||||
: "=r"(cw)
|
: "=r"(cw)::"memory", "cc"); // Clobber List
|
||||||
::"memory", "cc"); // Clobber List
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void DisableFlushDenormalToZero(void) {
|
static void DisableFlushDenormalToZero(void) {
|
||||||
@ -2615,8 +2619,7 @@ static void DisableFlushDenormalToZero(void) {
|
|||||||
"vmrs %0, fpscr \n"
|
"vmrs %0, fpscr \n"
|
||||||
"bic %0, %0, #0x1000000 \n"
|
"bic %0, %0, #0x1000000 \n"
|
||||||
"vmsr fpscr, %0 \n"
|
"vmsr fpscr, %0 \n"
|
||||||
: "=r"(cw)
|
: "=r"(cw)::"memory", "cc"); // Clobber List
|
||||||
::"memory", "cc"); // Clobber List
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// 5 bit exponent with bias of 15 will underflow to a denormal if scale causes
|
// 5 bit exponent with bias of 15 will underflow to a denormal if scale causes
|
||||||
@ -2626,18 +2629,18 @@ static void DisableFlushDenormalToZero(void) {
|
|||||||
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_flush_denormal) {
|
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_flush_denormal) {
|
||||||
// 32 bit arm rounding on denormal case is off by 1 compared to C.
|
// 32 bit arm rounding on denormal case is off by 1 compared to C.
|
||||||
EnableFlushDenormalToZero();
|
EnableFlushDenormalToZero();
|
||||||
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
|
int diff = TestHalfFloatPlane(
|
||||||
benchmark_iterations_, disable_cpu_flags_,
|
benchmark_width_, benchmark_height_, benchmark_iterations_,
|
||||||
benchmark_cpu_info_, 1.0f / 65535.0f, 65535, +1, 0);
|
disable_cpu_flags_, benchmark_cpu_info_, 1.0f / 65535.0f, 65535, +1, 0);
|
||||||
DisableFlushDenormalToZero();
|
DisableFlushDenormalToZero();
|
||||||
EXPECT_EQ(0, diff);
|
EXPECT_EQ(0, diff);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_10bit_flush_denormal) {
|
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_10bit_flush_denormal) {
|
||||||
EnableFlushDenormalToZero();
|
EnableFlushDenormalToZero();
|
||||||
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
|
int diff = TestHalfFloatPlane(
|
||||||
benchmark_iterations_, disable_cpu_flags_,
|
benchmark_width_, benchmark_height_, benchmark_iterations_,
|
||||||
benchmark_cpu_info_, 1.0f / 1023.0f, 1023, +1, 0);
|
disable_cpu_flags_, benchmark_cpu_info_, 1.0f / 1023.0f, 1023, +1, 0);
|
||||||
DisableFlushDenormalToZero();
|
DisableFlushDenormalToZero();
|
||||||
EXPECT_EQ(0, diff);
|
EXPECT_EQ(0, diff);
|
||||||
}
|
}
|
||||||
@ -3184,8 +3187,9 @@ TEST_F(LibYUVPlanarTest, MergeRGBPlane_Opt) {
|
|||||||
tmp_pixels_c_b, benchmark_width_, benchmark_width_,
|
tmp_pixels_c_b, benchmark_width_, benchmark_width_,
|
||||||
benchmark_height_);
|
benchmark_height_);
|
||||||
MergeRGBPlane(tmp_pixels_c_r, benchmark_width_, tmp_pixels_c_g,
|
MergeRGBPlane(tmp_pixels_c_r, benchmark_width_, tmp_pixels_c_g,
|
||||||
benchmark_width_, tmp_pixels_c_b, benchmark_width_, dst_pixels_c,
|
benchmark_width_, tmp_pixels_c_b, benchmark_width_,
|
||||||
benchmark_width_ * 3, benchmark_width_, benchmark_height_);
|
dst_pixels_c, benchmark_width_ * 3, benchmark_width_,
|
||||||
|
benchmark_height_);
|
||||||
MaskCpuFlags(benchmark_cpu_info_);
|
MaskCpuFlags(benchmark_cpu_info_);
|
||||||
|
|
||||||
SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_opt_r,
|
SplitRGBPlane(src_pixels, benchmark_width_ * 3, tmp_pixels_opt_r,
|
||||||
@ -3244,8 +3248,9 @@ TEST_F(LibYUVPlanarTest, SplitRGBPlane_Opt) {
|
|||||||
tmp_pixels_c_b, benchmark_width_, benchmark_width_,
|
tmp_pixels_c_b, benchmark_width_, benchmark_width_,
|
||||||
benchmark_height_);
|
benchmark_height_);
|
||||||
MergeRGBPlane(tmp_pixels_c_r, benchmark_width_, tmp_pixels_c_g,
|
MergeRGBPlane(tmp_pixels_c_r, benchmark_width_, tmp_pixels_c_g,
|
||||||
benchmark_width_, tmp_pixels_c_b, benchmark_width_, dst_pixels_c,
|
benchmark_width_, tmp_pixels_c_b, benchmark_width_,
|
||||||
benchmark_width_ * 3, benchmark_width_, benchmark_height_);
|
dst_pixels_c, benchmark_width_ * 3, benchmark_width_,
|
||||||
|
benchmark_height_);
|
||||||
MaskCpuFlags(benchmark_cpu_info_);
|
MaskCpuFlags(benchmark_cpu_info_);
|
||||||
|
|
||||||
for (int i = 0; i < benchmark_iterations_; ++i) {
|
for (int i = 0; i < benchmark_iterations_; ++i) {
|
||||||
@ -3446,8 +3451,8 @@ TEST_F(LibYUVPlanarTest, MergeXRGBPlane_Opt) {
|
|||||||
|
|
||||||
for (int i = 0; i < benchmark_iterations_; ++i) {
|
for (int i = 0; i < benchmark_iterations_; ++i) {
|
||||||
MergeARGBPlane(tmp_pixels_opt_r, benchmark_width_, tmp_pixels_opt_g,
|
MergeARGBPlane(tmp_pixels_opt_r, benchmark_width_, tmp_pixels_opt_g,
|
||||||
benchmark_width_, tmp_pixels_opt_b, benchmark_width_, NULL, 0,
|
benchmark_width_, tmp_pixels_opt_b, benchmark_width_, NULL,
|
||||||
dst_pixels_opt, benchmark_width_ * 4, benchmark_width_,
|
0, dst_pixels_opt, benchmark_width_ * 4, benchmark_width_,
|
||||||
benchmark_height_);
|
benchmark_height_);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3502,8 +3507,8 @@ TEST_F(LibYUVPlanarTest, SplitXRGBPlane_Opt) {
|
|||||||
for (int i = 0; i < benchmark_iterations_; ++i) {
|
for (int i = 0; i < benchmark_iterations_; ++i) {
|
||||||
SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_opt_r,
|
SplitARGBPlane(src_pixels, benchmark_width_ * 4, tmp_pixels_opt_r,
|
||||||
benchmark_width_, tmp_pixels_opt_g, benchmark_width_,
|
benchmark_width_, tmp_pixels_opt_g, benchmark_width_,
|
||||||
tmp_pixels_opt_b, benchmark_width_, NULL, 0, benchmark_width_,
|
tmp_pixels_opt_b, benchmark_width_, NULL, 0,
|
||||||
benchmark_height_);
|
benchmark_width_, benchmark_height_);
|
||||||
}
|
}
|
||||||
|
|
||||||
MergeARGBPlane(tmp_pixels_opt_r, benchmark_width_, tmp_pixels_opt_g,
|
MergeARGBPlane(tmp_pixels_opt_r, benchmark_width_, tmp_pixels_opt_g,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user