mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
Upstream minor changes. Faster tests, Faster YUV Rotate180 and Mirror
Bug: libyuv:840, libyuv:849: b/144318948 Change-Id: I303c02ac2b838a09d3e623df7a69ffc085fe3cd2 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/1914781 Reviewed-by: Miguel Casas <mcasas@chromium.org> Commit-Queue: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
parent
6afd9becdf
commit
d82f4baf5f
4
BUILD.gn
4
BUILD.gn
@ -73,7 +73,7 @@ group("libyuv") {
|
|||||||
deps += [ ":libyuv_mmi" ]
|
deps += [ ":libyuv_mmi" ]
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!is_ios) {
|
if (!is_ios && !libyuv_disable_jpeg) {
|
||||||
# Make sure that clients of libyuv link with libjpeg. This can't go in
|
# Make sure that clients of libyuv link with libjpeg. This can't go in
|
||||||
# libyuv_internal because in Windows x64 builds that will generate a clang
|
# libyuv_internal because in Windows x64 builds that will generate a clang
|
||||||
# build of libjpeg, and we don't want two copies.
|
# build of libjpeg, and we don't want two copies.
|
||||||
@ -150,7 +150,7 @@ static_library("libyuv_internal") {
|
|||||||
configs += [ "//build/config/gcc:symbol_visibility_default" ]
|
configs += [ "//build/config/gcc:symbol_visibility_default" ]
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!is_ios) {
|
if (!is_ios && !libyuv_disable_jpeg) {
|
||||||
defines += [ "HAVE_JPEG" ]
|
defines += [ "HAVE_JPEG" ]
|
||||||
|
|
||||||
# Needed to pull in libjpeg headers. Can't add //third_party:jpeg to deps
|
# Needed to pull in libjpeg headers. Can't add //third_party:jpeg to deps
|
||||||
|
|||||||
@ -734,7 +734,7 @@ void MirrorPlane(const uint8_t* src_y,
|
|||||||
#if defined(HAS_MIRRORROW_NEON)
|
#if defined(HAS_MIRRORROW_NEON)
|
||||||
if (TestCpuFlag(kCpuHasNEON)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
MirrorRow = MirrorRow_Any_NEON;
|
MirrorRow = MirrorRow_Any_NEON;
|
||||||
if (IS_ALIGNED(width, 16)) {
|
if (IS_ALIGNED(width, 32)) {
|
||||||
MirrorRow = MirrorRow_NEON;
|
MirrorRow = MirrorRow_NEON;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -142,7 +142,7 @@ void RotatePlane180(const uint8_t* src,
|
|||||||
#if defined(HAS_MIRRORROW_NEON)
|
#if defined(HAS_MIRRORROW_NEON)
|
||||||
if (TestCpuFlag(kCpuHasNEON)) {
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
MirrorRow = MirrorRow_Any_NEON;
|
MirrorRow = MirrorRow_Any_NEON;
|
||||||
if (IS_ALIGNED(width, 16)) {
|
if (IS_ALIGNED(width, 32)) {
|
||||||
MirrorRow = MirrorRow_NEON;
|
MirrorRow = MirrorRow_NEON;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -207,11 +207,11 @@ void RotatePlane180(const uint8_t* src,
|
|||||||
|
|
||||||
// Odd height will harmlessly mirror the middle row twice.
|
// Odd height will harmlessly mirror the middle row twice.
|
||||||
for (y = 0; y < half_height; ++y) {
|
for (y = 0; y < half_height; ++y) {
|
||||||
MirrorRow(src, row, width); // Mirror first row into a buffer
|
CopyRow(src, row, width); // Copy first row into buffer
|
||||||
src += src_stride;
|
|
||||||
MirrorRow(src_bot, dst, width); // Mirror last row into first row
|
MirrorRow(src_bot, dst, width); // Mirror last row into first row
|
||||||
|
MirrorRow(row, dst_bot, width); // Mirror buffer into last row
|
||||||
|
src += src_stride;
|
||||||
dst += dst_stride;
|
dst += dst_stride;
|
||||||
CopyRow(row, dst_bot, width); // Copy first mirrored row into last
|
|
||||||
src_bot -= src_stride;
|
src_bot -= src_stride;
|
||||||
dst_bot -= dst_stride;
|
dst_bot -= dst_stride;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1156,7 +1156,7 @@ ANY11M(MirrorRow_Any_AVX2, MirrorRow_AVX2, 1, 31)
|
|||||||
ANY11M(MirrorRow_Any_SSSE3, MirrorRow_SSSE3, 1, 15)
|
ANY11M(MirrorRow_Any_SSSE3, MirrorRow_SSSE3, 1, 15)
|
||||||
#endif
|
#endif
|
||||||
#ifdef HAS_MIRRORROW_NEON
|
#ifdef HAS_MIRRORROW_NEON
|
||||||
ANY11M(MirrorRow_Any_NEON, MirrorRow_NEON, 1, 15)
|
ANY11M(MirrorRow_Any_NEON, MirrorRow_NEON, 1, 31)
|
||||||
#endif
|
#endif
|
||||||
#ifdef HAS_MIRRORROW_MSA
|
#ifdef HAS_MIRRORROW_MSA
|
||||||
ANY11M(MirrorRow_Any_MSA, MirrorRow_MSA, 1, 63)
|
ANY11M(MirrorRow_Any_MSA, MirrorRow_MSA, 1, 63)
|
||||||
|
|||||||
@ -84,7 +84,7 @@ static const uvec8 kAddUV128 = {128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u,
|
|||||||
128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u};
|
128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u};
|
||||||
|
|
||||||
static const uvec16 kSub128 = {0x8080u, 0x8080u, 0x8080u, 0x8080u,
|
static const uvec16 kSub128 = {0x8080u, 0x8080u, 0x8080u, 0x8080u,
|
||||||
0x8080u, 0x8080u, 0x8080u, 0x8080u};
|
0x8080u, 0x8080u, 0x8080u, 0x8080u};
|
||||||
|
|
||||||
#endif // defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_I422TOARGBROW_SSSE3)
|
#endif // defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_I422TOARGBROW_SSSE3)
|
||||||
|
|
||||||
@ -1101,10 +1101,8 @@ void ABGRToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width) {
|
|||||||
"lea 0x40(%0),%0 \n" \
|
"lea 0x40(%0),%0 \n" \
|
||||||
"phaddw %%xmm0,%%xmm6 \n" \
|
"phaddw %%xmm0,%%xmm6 \n" \
|
||||||
"phaddw %%xmm2,%%xmm1 \n" \
|
"phaddw %%xmm2,%%xmm1 \n" \
|
||||||
"paddw %%" #round \
|
"paddw %%" #round ",%%xmm6 \n" \
|
||||||
",%%xmm6 \n" \
|
"paddw %%" #round ",%%xmm1 \n" \
|
||||||
"paddw %%" #round \
|
|
||||||
",%%xmm1 \n" \
|
|
||||||
"psrlw $0x8,%%xmm6 \n" \
|
"psrlw $0x8,%%xmm6 \n" \
|
||||||
"psrlw $0x8,%%xmm1 \n" \
|
"psrlw $0x8,%%xmm1 \n" \
|
||||||
"packuswb %%xmm1,%%xmm6 \n" \
|
"packuswb %%xmm1,%%xmm6 \n" \
|
||||||
@ -1113,35 +1111,33 @@ void ABGRToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width) {
|
|||||||
"sub $0x10,%2 \n" \
|
"sub $0x10,%2 \n" \
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
|
|
||||||
#define RGBTOY_AVX2(round) \
|
#define RGBTOY_AVX2(round) \
|
||||||
"1: \n" \
|
"1: \n" \
|
||||||
"vmovdqu (%0),%%ymm0 \n" \
|
"vmovdqu (%0),%%ymm0 \n" \
|
||||||
"vmovdqu 0x20(%0),%%ymm1 \n" \
|
"vmovdqu 0x20(%0),%%ymm1 \n" \
|
||||||
"vmovdqu 0x40(%0),%%ymm2 \n" \
|
"vmovdqu 0x40(%0),%%ymm2 \n" \
|
||||||
"vmovdqu 0x60(%0),%%ymm3 \n" \
|
"vmovdqu 0x60(%0),%%ymm3 \n" \
|
||||||
"vpsubb %%ymm5, %%ymm0, %%ymm0 \n" \
|
"vpsubb %%ymm5, %%ymm0, %%ymm0 \n" \
|
||||||
"vpsubb %%ymm5, %%ymm1, %%ymm1 \n" \
|
"vpsubb %%ymm5, %%ymm1, %%ymm1 \n" \
|
||||||
"vpsubb %%ymm5, %%ymm2, %%ymm2 \n" \
|
"vpsubb %%ymm5, %%ymm2, %%ymm2 \n" \
|
||||||
"vpsubb %%ymm5, %%ymm3, %%ymm3 \n" \
|
"vpsubb %%ymm5, %%ymm3, %%ymm3 \n" \
|
||||||
"vpmaddubsw %%ymm0,%%ymm4,%%ymm0 \n" \
|
"vpmaddubsw %%ymm0,%%ymm4,%%ymm0 \n" \
|
||||||
"vpmaddubsw %%ymm1,%%ymm4,%%ymm1 \n" \
|
"vpmaddubsw %%ymm1,%%ymm4,%%ymm1 \n" \
|
||||||
"vpmaddubsw %%ymm2,%%ymm4,%%ymm2 \n" \
|
"vpmaddubsw %%ymm2,%%ymm4,%%ymm2 \n" \
|
||||||
"vpmaddubsw %%ymm3,%%ymm4,%%ymm3 \n" \
|
"vpmaddubsw %%ymm3,%%ymm4,%%ymm3 \n" \
|
||||||
"lea 0x80(%0),%0 \n" \
|
"lea 0x80(%0),%0 \n" \
|
||||||
"vphaddw %%ymm1,%%ymm0,%%ymm0 \n" /* mutates. */ \
|
"vphaddw %%ymm1,%%ymm0,%%ymm0 \n" /* mutates. */ \
|
||||||
"vphaddw %%ymm3,%%ymm2,%%ymm2 \n" \
|
"vphaddw %%ymm3,%%ymm2,%%ymm2 \n" \
|
||||||
"vpaddw %%" #round \
|
"vpaddw %%" #round ",%%ymm0,%%ymm0 \n" /* Add .5 for rounding. */ \
|
||||||
",%%ymm0,%%ymm0 \n" /* Add .5 for rounding. */ \
|
"vpaddw %%" #round ",%%ymm2,%%ymm2 \n" \
|
||||||
"vpaddw %%" #round \
|
"vpsrlw $0x8,%%ymm0,%%ymm0 \n" \
|
||||||
",%%ymm2,%%ymm2 \n" \
|
"vpsrlw $0x8,%%ymm2,%%ymm2 \n" \
|
||||||
"vpsrlw $0x8,%%ymm0,%%ymm0 \n" \
|
"vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" /* mutates. */ \
|
||||||
"vpsrlw $0x8,%%ymm2,%%ymm2 \n" \
|
"vpermd %%ymm0,%%ymm6,%%ymm0 \n" /* unmutate. */ \
|
||||||
"vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" /* mutates. */ \
|
"vmovdqu %%ymm0,(%1) \n" \
|
||||||
"vpermd %%ymm0,%%ymm6,%%ymm0 \n" /* unmutate. */ \
|
"lea 0x20(%1),%1 \n" \
|
||||||
"vmovdqu %%ymm0,(%1) \n" \
|
"sub $0x20,%2 \n" \
|
||||||
"lea 0x20(%1),%1 \n" \
|
"jg 1b \n" \
|
||||||
"sub $0x20,%2 \n" \
|
|
||||||
"jg 1b \n" \
|
|
||||||
"vzeroupper \n"
|
"vzeroupper \n"
|
||||||
|
|
||||||
#ifdef HAS_ARGBTOYROW_SSSE3
|
#ifdef HAS_ARGBTOYROW_SSSE3
|
||||||
@ -1152,15 +1148,15 @@ void ARGBToYRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width) {
|
|||||||
"movdqa %4,%%xmm5 \n"
|
"movdqa %4,%%xmm5 \n"
|
||||||
"movdqa %5,%%xmm7 \n"
|
"movdqa %5,%%xmm7 \n"
|
||||||
|
|
||||||
LABELALIGN RGBTOY(xmm7)
|
LABELALIGN
|
||||||
|
RGBTOY(xmm7)
|
||||||
: "+r"(src_argb), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(dst_y), // %1
|
"+r"(dst_y), // %1
|
||||||
"+r"(width) // %2
|
"+r"(width) // %2
|
||||||
: "m"(kARGBToY), // %3
|
: "m"(kARGBToY), // %3
|
||||||
"m"(kSub128), // %4
|
"m"(kSub128), // %4
|
||||||
"m"(kAddY16) // %5
|
"m"(kAddY16) // %5
|
||||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
|
||||||
"xmm7");
|
|
||||||
}
|
}
|
||||||
#endif // HAS_ARGBTOYROW_SSSE3
|
#endif // HAS_ARGBTOYROW_SSSE3
|
||||||
|
|
||||||
@ -1172,7 +1168,8 @@ void ARGBToYJRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width) {
|
|||||||
"movdqa %3,%%xmm4 \n"
|
"movdqa %3,%%xmm4 \n"
|
||||||
"movdqa %4,%%xmm5 \n"
|
"movdqa %4,%%xmm5 \n"
|
||||||
|
|
||||||
LABELALIGN RGBTOY(xmm5)
|
LABELALIGN
|
||||||
|
RGBTOY(xmm5)
|
||||||
: "+r"(src_argb), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(dst_y), // %1
|
"+r"(dst_y), // %1
|
||||||
"+r"(width) // %2
|
"+r"(width) // %2
|
||||||
@ -1190,7 +1187,8 @@ void RGBAToYJRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
|
|||||||
"movdqa %3,%%xmm4 \n"
|
"movdqa %3,%%xmm4 \n"
|
||||||
"movdqa %4,%%xmm5 \n"
|
"movdqa %4,%%xmm5 \n"
|
||||||
|
|
||||||
LABELALIGN RGBTOY(xmm5)
|
LABELALIGN
|
||||||
|
RGBTOY(xmm5)
|
||||||
: "+r"(src_rgba), // %0
|
: "+r"(src_rgba), // %0
|
||||||
"+r"(dst_y), // %1
|
"+r"(dst_y), // %1
|
||||||
"+r"(width) // %2
|
"+r"(width) // %2
|
||||||
@ -1212,7 +1210,8 @@ void ARGBToYRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) {
|
|||||||
"vbroadcastf128 %5,%%ymm7 \n"
|
"vbroadcastf128 %5,%%ymm7 \n"
|
||||||
"vmovdqu %6,%%ymm6 \n"
|
"vmovdqu %6,%%ymm6 \n"
|
||||||
|
|
||||||
LABELALIGN RGBTOY_AVX2(ymm7)
|
LABELALIGN
|
||||||
|
RGBTOY_AVX2(ymm7)
|
||||||
: "+r"(src_argb), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(dst_y), // %1
|
"+r"(dst_y), // %1
|
||||||
"+r"(width) // %2
|
"+r"(width) // %2
|
||||||
@ -1220,8 +1219,7 @@ void ARGBToYRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) {
|
|||||||
"m"(kSub128), // %4
|
"m"(kSub128), // %4
|
||||||
"m"(kAddY16), // %5
|
"m"(kAddY16), // %5
|
||||||
"m"(kPermdARGBToY_AVX) // %6
|
"m"(kPermdARGBToY_AVX) // %6
|
||||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
|
||||||
"xmm7");
|
|
||||||
}
|
}
|
||||||
#endif // HAS_ARGBTOYROW_AVX2
|
#endif // HAS_ARGBTOYROW_AVX2
|
||||||
|
|
||||||
@ -1234,7 +1232,8 @@ void ABGRToYRow_AVX2(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
|
|||||||
"vbroadcastf128 %5,%%ymm7 \n"
|
"vbroadcastf128 %5,%%ymm7 \n"
|
||||||
"vmovdqu %6,%%ymm6 \n"
|
"vmovdqu %6,%%ymm6 \n"
|
||||||
|
|
||||||
LABELALIGN RGBTOY_AVX2(ymm7)
|
LABELALIGN
|
||||||
|
RGBTOY_AVX2(ymm7)
|
||||||
: "+r"(src_abgr), // %0
|
: "+r"(src_abgr), // %0
|
||||||
"+r"(dst_y), // %1
|
"+r"(dst_y), // %1
|
||||||
"+r"(width) // %2
|
"+r"(width) // %2
|
||||||
@ -1242,8 +1241,7 @@ void ABGRToYRow_AVX2(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
|
|||||||
"m"(kSub128), // %4
|
"m"(kSub128), // %4
|
||||||
"m"(kAddY16), // %5
|
"m"(kAddY16), // %5
|
||||||
"m"(kPermdARGBToY_AVX) // %6
|
"m"(kPermdARGBToY_AVX) // %6
|
||||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
|
||||||
"xmm7");
|
|
||||||
}
|
}
|
||||||
#endif // HAS_ABGRTOYROW_AVX2
|
#endif // HAS_ABGRTOYROW_AVX2
|
||||||
|
|
||||||
@ -1255,15 +1253,15 @@ void ARGBToYJRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) {
|
|||||||
"vbroadcastf128 %4,%%ymm5 \n"
|
"vbroadcastf128 %4,%%ymm5 \n"
|
||||||
"vmovdqu %5,%%ymm6 \n"
|
"vmovdqu %5,%%ymm6 \n"
|
||||||
|
|
||||||
LABELALIGN RGBTOY_AVX2(ymm5)
|
LABELALIGN
|
||||||
|
RGBTOY_AVX2(ymm5)
|
||||||
: "+r"(src_argb), // %0
|
: "+r"(src_argb), // %0
|
||||||
"+r"(dst_y), // %1
|
"+r"(dst_y), // %1
|
||||||
"+r"(width) // %2
|
"+r"(width) // %2
|
||||||
: "m"(kARGBToYJ), // %3
|
: "m"(kARGBToYJ), // %3
|
||||||
"m"(kSub128), // %4
|
"m"(kSub128), // %4
|
||||||
"m"(kPermdARGBToY_AVX) // %5
|
"m"(kPermdARGBToY_AVX) // %5
|
||||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
|
||||||
"xmm7");
|
|
||||||
}
|
}
|
||||||
#endif // HAS_ARGBTOYJROW_AVX2
|
#endif // HAS_ARGBTOYJROW_AVX2
|
||||||
|
|
||||||
@ -1275,8 +1273,9 @@ void RGBAToYJRow_AVX2(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
|
|||||||
"vbroadcastf128 %4,%%ymm5 \n"
|
"vbroadcastf128 %4,%%ymm5 \n"
|
||||||
"vmovdqu %5,%%ymm6 \n"
|
"vmovdqu %5,%%ymm6 \n"
|
||||||
|
|
||||||
LABELALIGN RGBTOY_AVX2(
|
LABELALIGN
|
||||||
ymm5) "vzeroupper \n"
|
RGBTOY_AVX2(ymm5)
|
||||||
|
"vzeroupper \n"
|
||||||
: "+r"(src_rgba), // %0
|
: "+r"(src_rgba), // %0
|
||||||
"+r"(dst_y), // %1
|
"+r"(dst_y), // %1
|
||||||
"+r"(width) // %2
|
"+r"(width) // %2
|
||||||
@ -1537,7 +1536,7 @@ void ARGBToUVJRow_AVX2(const uint8_t* src_argb0,
|
|||||||
"+r"(dst_v), // %2
|
"+r"(dst_v), // %2
|
||||||
"+rm"(width) // %3
|
"+rm"(width) // %3
|
||||||
: "r"((intptr_t)(src_stride_argb)), // %4
|
: "r"((intptr_t)(src_stride_argb)), // %4
|
||||||
"m"(kSub128), // %5
|
"m"(kSub128), // %5
|
||||||
"m"(kARGBToVJ), // %6
|
"m"(kARGBToVJ), // %6
|
||||||
"m"(kARGBToUJ), // %7
|
"m"(kARGBToUJ), // %7
|
||||||
"m"(kShufARGBToUV_AVX) // %8
|
"m"(kShufARGBToUV_AVX) // %8
|
||||||
@ -1607,7 +1606,7 @@ void ARGBToUVJRow_SSSE3(const uint8_t* src_argb0,
|
|||||||
: "r"((intptr_t)(src_stride_argb)), // %4
|
: "r"((intptr_t)(src_stride_argb)), // %4
|
||||||
"m"(kARGBToVJ), // %5
|
"m"(kARGBToVJ), // %5
|
||||||
"m"(kARGBToUJ), // %6
|
"m"(kARGBToUJ), // %6
|
||||||
"m"(kSub128) // %7
|
"m"(kSub128) // %7
|
||||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6", "xmm7");
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6", "xmm7");
|
||||||
}
|
}
|
||||||
#endif // HAS_ARGBTOUVJROW_SSSE3
|
#endif // HAS_ARGBTOUVJROW_SSSE3
|
||||||
@ -1676,15 +1675,15 @@ void BGRAToYRow_SSSE3(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
|
|||||||
"movdqa %4,%%xmm5 \n"
|
"movdqa %4,%%xmm5 \n"
|
||||||
"movdqa %5,%%xmm7 \n"
|
"movdqa %5,%%xmm7 \n"
|
||||||
|
|
||||||
LABELALIGN RGBTOY(xmm7)
|
LABELALIGN
|
||||||
|
RGBTOY(xmm7)
|
||||||
: "+r"(src_bgra), // %0
|
: "+r"(src_bgra), // %0
|
||||||
"+r"(dst_y), // %1
|
"+r"(dst_y), // %1
|
||||||
"+r"(width) // %2
|
"+r"(width) // %2
|
||||||
: "m"(kBGRAToY), // %3
|
: "m"(kBGRAToY), // %3
|
||||||
"m"(kSub128), // %4
|
"m"(kSub128), // %4
|
||||||
"m"(kAddY16) // %5
|
"m"(kAddY16) // %5
|
||||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
|
||||||
"xmm7");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void BGRAToUVRow_SSSE3(const uint8_t* src_bgra0,
|
void BGRAToUVRow_SSSE3(const uint8_t* src_bgra0,
|
||||||
@ -1756,15 +1755,15 @@ void ABGRToYRow_SSSE3(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
|
|||||||
"movdqa %4,%%xmm5 \n"
|
"movdqa %4,%%xmm5 \n"
|
||||||
"movdqa %5,%%xmm7 \n"
|
"movdqa %5,%%xmm7 \n"
|
||||||
|
|
||||||
LABELALIGN RGBTOY(xmm7)
|
LABELALIGN
|
||||||
|
RGBTOY(xmm7)
|
||||||
: "+r"(src_abgr), // %0
|
: "+r"(src_abgr), // %0
|
||||||
"+r"(dst_y), // %1
|
"+r"(dst_y), // %1
|
||||||
"+r"(width) // %2
|
"+r"(width) // %2
|
||||||
: "m"(kABGRToY), // %3
|
: "m"(kABGRToY), // %3
|
||||||
"m"(kSub128), // %4
|
"m"(kSub128), // %4
|
||||||
"m"(kAddY16) // %5
|
"m"(kAddY16) // %5
|
||||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
|
||||||
"xmm7");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void RGBAToYRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
|
void RGBAToYRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
|
||||||
@ -1773,15 +1772,15 @@ void RGBAToYRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
|
|||||||
"movdqa %4,%%xmm5 \n"
|
"movdqa %4,%%xmm5 \n"
|
||||||
"movdqa %5,%%xmm7 \n"
|
"movdqa %5,%%xmm7 \n"
|
||||||
|
|
||||||
LABELALIGN RGBTOY(xmm7)
|
LABELALIGN
|
||||||
|
RGBTOY(xmm7)
|
||||||
: "+r"(src_rgba), // %0
|
: "+r"(src_rgba), // %0
|
||||||
"+r"(dst_y), // %1
|
"+r"(dst_y), // %1
|
||||||
"+r"(width) // %2
|
"+r"(width) // %2
|
||||||
: "m"(kRGBAToY), // %3
|
: "m"(kRGBAToY), // %3
|
||||||
"m"(kSub128), // %4
|
"m"(kSub128), // %4
|
||||||
"m"(kAddY16) // %5
|
"m"(kAddY16) // %5
|
||||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
|
||||||
"xmm7");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ABGRToUVRow_SSSE3(const uint8_t* src_abgr0,
|
void ABGRToUVRow_SSSE3(const uint8_t* src_abgr0,
|
||||||
|
|||||||
2665
source/row_mmi.cc
2665
source/row_mmi.cc
File diff suppressed because it is too large
Load Diff
@ -682,22 +682,23 @@ void ARGBSetRow_NEON(uint8_t* dst, uint32_t v32, int width) {
|
|||||||
void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width) {
|
void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
// Start at end of source row.
|
// Start at end of source row.
|
||||||
"mov r3, #-16 \n"
|
|
||||||
"add %0, %0, %2 \n"
|
"add %0, %0, %2 \n"
|
||||||
"sub %0, #16 \n"
|
"sub %0, %0, #32 \n" // 32 bytes per loop
|
||||||
|
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"vld1.8 {q0}, [%0], r3 \n" // src -= 16
|
"vld1.8 {q1, q2}, [%0], %3 \n" // src -= 32
|
||||||
"subs %2, #16 \n" // 16 pixels per loop.
|
"subs %2, #32 \n" // 32 pixels per loop.
|
||||||
"vrev64.8 q0, q0 \n"
|
"vrev64.8 q0, q2 \n"
|
||||||
"vst1.8 {d1}, [%1]! \n" // dst += 16
|
"vrev64.8 q1, q1 \n"
|
||||||
"vst1.8 {d0}, [%1]! \n"
|
"vswp d0, d1 \n"
|
||||||
|
"vswp d2, d3 \n"
|
||||||
|
"vst1.8 {q0, q1}, [%1]! \n" // dst += 32
|
||||||
"bgt 1b \n"
|
"bgt 1b \n"
|
||||||
: "+r"(src), // %0
|
: "+r"(src), // %0
|
||||||
"+r"(dst), // %1
|
"+r"(dst), // %1
|
||||||
"+r"(width) // %2
|
"+r"(width) // %2
|
||||||
:
|
: "r"(-32) // %3
|
||||||
: "cc", "memory", "r3", "q0");
|
: "cc", "memory", "q0", "q1", "q2");
|
||||||
}
|
}
|
||||||
|
|
||||||
void MirrorUVRow_NEON(const uint8_t* src_uv,
|
void MirrorUVRow_NEON(const uint8_t* src_uv,
|
||||||
|
|||||||
@ -723,23 +723,29 @@ void ARGBSetRow_NEON(uint8_t* dst, uint32_t v32, int width) {
|
|||||||
: "cc", "memory", "v0");
|
: "cc", "memory", "v0");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Shuffle table for reversing the bytes.
|
||||||
|
static const uvec8 kShuffleMirror = {15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u,
|
||||||
|
7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u};
|
||||||
|
|
||||||
void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width) {
|
void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width) {
|
||||||
asm volatile(
|
asm volatile(
|
||||||
// Start at end of source row.
|
// Start at end of source row.
|
||||||
|
"ld1 {v3.16b}, [%4] \n" // shuffler
|
||||||
"add %0, %0, %w2, sxtw \n"
|
"add %0, %0, %w2, sxtw \n"
|
||||||
"sub %0, %0, #16 \n"
|
"sub %0, %0, #32 \n"
|
||||||
"1: \n"
|
"1: \n"
|
||||||
"ld1 {v0.16b}, [%0], %3 \n" // src -= 16
|
"ld1 {v1.16b,v2.16b}, [%0], %3 \n" // src -= 32
|
||||||
"subs %w2, %w2, #16 \n" // 16 pixels per loop.
|
"subs %w2, %w2, #32 \n" // 32 pixels per loop.
|
||||||
"rev64 v0.16b, v0.16b \n"
|
"tbl v1.16b, {v1.16b}, v3.16b \n"
|
||||||
"st1 {v0.D}[1], [%1], #8 \n" // dst += 16
|
"tbl v0.16b, {v2.16b}, v3.16b \n"
|
||||||
"st1 {v0.D}[0], [%1], #8 \n"
|
"st1 {v0.16b, v1.16b}, [%1], #32 \n" // store 32 pixels
|
||||||
"b.gt 1b \n"
|
"b.gt 1b \n"
|
||||||
: "+r"(src), // %0
|
: "+r"(src), // %0
|
||||||
"+r"(dst), // %1
|
"+r"(dst), // %1
|
||||||
"+r"(width) // %2
|
"+r"(width) // %2
|
||||||
: "r"((ptrdiff_t)-16) // %3
|
: "r"((ptrdiff_t)-32), // %3
|
||||||
: "cc", "memory", "v0");
|
"r"(&kShuffleMirror) // %4
|
||||||
|
: "cc", "memory", "v0", "v1", "v2", "v3");
|
||||||
}
|
}
|
||||||
|
|
||||||
void MirrorUVRow_NEON(const uint8_t* src_uv,
|
void MirrorUVRow_NEON(const uint8_t* src_uv,
|
||||||
|
|||||||
@ -40,9 +40,9 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Some functions fail on big endian. Enable these tests on all cpus except
|
// Some functions fail on big endian. Enable these tests on all cpus except
|
||||||
// PowerPC
|
// PowerPC, but they are not optimized so disabled by default.
|
||||||
#if !defined(__powerpc__)
|
#if !defined(__powerpc__) && defined(ENABLE_SLOW_TESTS)
|
||||||
#define LITTLE_ENDIAN_TEST 1
|
#define INTEL_TEST 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace libyuv {
|
namespace libyuv {
|
||||||
@ -691,7 +691,7 @@ TESTPLANARTOB(J420, 2, 2, RAW, 3, 3, 1)
|
|||||||
TESTPLANARTOB(J420, 2, 2, RGB24, 3, 3, 1)
|
TESTPLANARTOB(J420, 2, 2, RGB24, 3, 3, 1)
|
||||||
TESTPLANARTOB(H420, 2, 2, RAW, 3, 3, 1)
|
TESTPLANARTOB(H420, 2, 2, RAW, 3, 3, 1)
|
||||||
TESTPLANARTOB(H420, 2, 2, RGB24, 3, 3, 1)
|
TESTPLANARTOB(H420, 2, 2, RGB24, 3, 3, 1)
|
||||||
#ifdef LITTLE_ENDIAN_TEST
|
#ifdef INTEL_TEST
|
||||||
TESTPLANARTOB(I420, 2, 2, RGB565, 2, 2, 1)
|
TESTPLANARTOB(I420, 2, 2, RGB565, 2, 2, 1)
|
||||||
TESTPLANARTOB(J420, 2, 2, RGB565, 2, 2, 1)
|
TESTPLANARTOB(J420, 2, 2, RGB565, 2, 2, 1)
|
||||||
TESTPLANARTOB(H420, 2, 2, RGB565, 2, 2, 1)
|
TESTPLANARTOB(H420, 2, 2, RGB565, 2, 2, 1)
|
||||||
@ -723,7 +723,7 @@ TESTPLANARTOB(I422, 2, 1, YUY2, 2, 4, 1)
|
|||||||
TESTPLANARTOB(I422, 2, 1, UYVY, 2, 4, 1)
|
TESTPLANARTOB(I422, 2, 1, UYVY, 2, 4, 1)
|
||||||
TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 1)
|
TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 1)
|
||||||
TESTPLANARTOB(J420, 2, 2, J400, 1, 1, 1)
|
TESTPLANARTOB(J420, 2, 2, J400, 1, 1, 1)
|
||||||
#ifdef LITTLE_ENDIAN_TEST
|
#ifdef INTEL_TEST
|
||||||
TESTPLANARTOB(I420, 2, 2, AR30, 4, 4, 1)
|
TESTPLANARTOB(I420, 2, 2, AR30, 4, 4, 1)
|
||||||
TESTPLANARTOB(H420, 2, 2, AR30, 4, 4, 1)
|
TESTPLANARTOB(H420, 2, 2, AR30, 4, 4, 1)
|
||||||
#endif
|
#endif
|
||||||
@ -876,7 +876,7 @@ TESTBIPLANARTOB(NV12, 2, 2, RGB24, RGB24, 3, 2)
|
|||||||
TESTBIPLANARTOB(NV21, 2, 2, RGB24, RGB24, 3, 2)
|
TESTBIPLANARTOB(NV21, 2, 2, RGB24, RGB24, 3, 2)
|
||||||
TESTBIPLANARTOB(NV12, 2, 2, RAW, RAW, 3, 2)
|
TESTBIPLANARTOB(NV12, 2, 2, RAW, RAW, 3, 2)
|
||||||
TESTBIPLANARTOB(NV21, 2, 2, RAW, RAW, 3, 2)
|
TESTBIPLANARTOB(NV21, 2, 2, RAW, RAW, 3, 2)
|
||||||
#ifdef LITTLE_ENDIAN_TEST
|
#ifdef INTEL_TEST
|
||||||
TESTBIPLANARTOB(NV12, 2, 2, RGB565, RGB565, 2, 9)
|
TESTBIPLANARTOB(NV12, 2, 2, RGB565, RGB565, 2, 9)
|
||||||
#endif
|
#endif
|
||||||
TESTBIPLANARTOB(NV21, 2, 2, YUV24, RAW, 3, 2)
|
TESTBIPLANARTOB(NV21, 2, 2, YUV24, RAW, 3, 2)
|
||||||
@ -1012,7 +1012,7 @@ TESTATOPLANAR(ARGB, 4, 1, I422, 2, 1, 2)
|
|||||||
TESTATOPLANAR(ARGB, 4, 1, I444, 1, 1, 2)
|
TESTATOPLANAR(ARGB, 4, 1, I444, 1, 1, 2)
|
||||||
TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2, ARM_YUV_ERROR)
|
TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2, ARM_YUV_ERROR)
|
||||||
TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1, ARM_YUV_ERROR)
|
TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1, ARM_YUV_ERROR)
|
||||||
#ifdef LITTLE_ENDIAN_TEST
|
#ifdef INTEL_TEST
|
||||||
TESTATOPLANAR(ARGB1555, 2, 1, I420, 2, 2, 15)
|
TESTATOPLANAR(ARGB1555, 2, 1, I420, 2, 2, 15)
|
||||||
TESTATOPLANAR(ARGB4444, 2, 1, I420, 2, 2, 17)
|
TESTATOPLANAR(ARGB4444, 2, 1, I420, 2, 2, 17)
|
||||||
#endif
|
#endif
|
||||||
@ -1022,7 +1022,7 @@ TESTATOPLANAR(J400, 1, 1, J420, 2, 2, 2)
|
|||||||
TESTATOPLANAR(RAW, 3, 1, I420, 2, 2, 4)
|
TESTATOPLANAR(RAW, 3, 1, I420, 2, 2, 4)
|
||||||
TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2, 4)
|
TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2, 4)
|
||||||
TESTATOPLANAR(RGB24, 3, 1, J420, 2, 2, ARM_YUV_ERROR)
|
TESTATOPLANAR(RGB24, 3, 1, J420, 2, 2, ARM_YUV_ERROR)
|
||||||
#ifdef LITTLE_ENDIAN_TEST
|
#ifdef INTEL_TEST
|
||||||
TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2, 5)
|
TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2, 5)
|
||||||
#endif
|
#endif
|
||||||
TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2, 4)
|
TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2, 4)
|
||||||
@ -1200,20 +1200,20 @@ TESTATOBIPLANAR(AYUV, 1, 4, NV21, 2, 2)
|
|||||||
// TODO(fbarchard): make ARM version of C code that matches NEON.
|
// TODO(fbarchard): make ARM version of C code that matches NEON.
|
||||||
TESTATOB(AB30, 4, 4, 1, ABGR, 4, 4, 1, 0)
|
TESTATOB(AB30, 4, 4, 1, ABGR, 4, 4, 1, 0)
|
||||||
TESTATOB(AB30, 4, 4, 1, ARGB, 4, 4, 1, 0)
|
TESTATOB(AB30, 4, 4, 1, ARGB, 4, 4, 1, 0)
|
||||||
#ifdef LITTLE_ENDIAN_TEST
|
#ifdef INTEL_TEST
|
||||||
TESTATOB(ABGR, 4, 4, 1, AR30, 4, 4, 1, 0)
|
TESTATOB(ABGR, 4, 4, 1, AR30, 4, 4, 1, 0)
|
||||||
#endif
|
#endif
|
||||||
TESTATOB(ABGR, 4, 4, 1, ARGB, 4, 4, 1, 0)
|
TESTATOB(ABGR, 4, 4, 1, ARGB, 4, 4, 1, 0)
|
||||||
#ifdef LITTLE_ENDIAN_TEST
|
#ifdef INTEL_TEST
|
||||||
TESTATOB(AR30, 4, 4, 1, AB30, 4, 4, 1, 0)
|
TESTATOB(AR30, 4, 4, 1, AB30, 4, 4, 1, 0)
|
||||||
#endif
|
#endif
|
||||||
TESTATOB(AR30, 4, 4, 1, ABGR, 4, 4, 1, 0)
|
TESTATOB(AR30, 4, 4, 1, ABGR, 4, 4, 1, 0)
|
||||||
#ifdef LITTLE_ENDIAN_TEST
|
#ifdef INTEL_TEST
|
||||||
TESTATOB(AR30, 4, 4, 1, AR30, 4, 4, 1, 0)
|
TESTATOB(AR30, 4, 4, 1, AR30, 4, 4, 1, 0)
|
||||||
TESTATOB(AR30, 4, 4, 1, ARGB, 4, 4, 1, 0)
|
TESTATOB(AR30, 4, 4, 1, ARGB, 4, 4, 1, 0)
|
||||||
#endif
|
#endif
|
||||||
TESTATOB(ARGB, 4, 4, 1, ABGR, 4, 4, 1, 0)
|
TESTATOB(ARGB, 4, 4, 1, ABGR, 4, 4, 1, 0)
|
||||||
#ifdef LITTLE_ENDIAN_TEST
|
#ifdef INTEL_TEST
|
||||||
TESTATOB(ARGB, 4, 4, 1, AR30, 4, 4, 1, 0)
|
TESTATOB(ARGB, 4, 4, 1, AR30, 4, 4, 1, 0)
|
||||||
#endif
|
#endif
|
||||||
TESTATOB(ARGB, 4, 4, 1, ARGB, 4, 4, 1, 0)
|
TESTATOB(ARGB, 4, 4, 1, ARGB, 4, 4, 1, 0)
|
||||||
@ -1226,7 +1226,7 @@ TESTATOB(ARGB, 4, 4, 1, J400, 1, 1, 1, 0)
|
|||||||
TESTATOB(RGBA, 4, 4, 1, J400, 1, 1, 1, 0)
|
TESTATOB(RGBA, 4, 4, 1, J400, 1, 1, 1, 0)
|
||||||
TESTATOB(ARGB, 4, 4, 1, RAW, 3, 3, 1, 0)
|
TESTATOB(ARGB, 4, 4, 1, RAW, 3, 3, 1, 0)
|
||||||
TESTATOB(ARGB, 4, 4, 1, RGB24, 3, 3, 1, 0)
|
TESTATOB(ARGB, 4, 4, 1, RGB24, 3, 3, 1, 0)
|
||||||
#ifdef LITTLE_ENDIAN_TEST
|
#ifdef INTEL_TEST
|
||||||
TESTATOB(ARGB, 4, 4, 1, RGB565, 2, 2, 1, 0)
|
TESTATOB(ARGB, 4, 4, 1, RGB565, 2, 2, 1, 0)
|
||||||
#endif
|
#endif
|
||||||
TESTATOB(ARGB, 4, 4, 1, RGBA, 4, 4, 1, 0)
|
TESTATOB(ARGB, 4, 4, 1, RGBA, 4, 4, 1, 0)
|
||||||
@ -1245,7 +1245,7 @@ TESTATOB(RAW, 3, 3, 1, RGBA, 4, 4, 1, 0)
|
|||||||
TESTATOB(RAW, 3, 3, 1, RGB24, 3, 3, 1, 0)
|
TESTATOB(RAW, 3, 3, 1, RGB24, 3, 3, 1, 0)
|
||||||
TESTATOB(RGB24, 3, 3, 1, ARGB, 4, 4, 1, 0)
|
TESTATOB(RGB24, 3, 3, 1, ARGB, 4, 4, 1, 0)
|
||||||
TESTATOB(RGB24, 3, 3, 1, J400, 1, 1, 1, 0)
|
TESTATOB(RGB24, 3, 3, 1, J400, 1, 1, 1, 0)
|
||||||
#ifdef LITTLE_ENDIAN_TEST
|
#ifdef INTEL_TEST
|
||||||
TESTATOB(RGB565, 2, 2, 1, ARGB, 4, 4, 1, 0)
|
TESTATOB(RGB565, 2, 2, 1, ARGB, 4, 4, 1, 0)
|
||||||
#endif
|
#endif
|
||||||
TESTATOB(RGBA, 4, 4, 1, ARGB, 4, 4, 1, 0)
|
TESTATOB(RGBA, 4, 4, 1, ARGB, 4, 4, 1, 0)
|
||||||
@ -1348,7 +1348,7 @@ TESTATOB(YUY2, 2, 4, 1, Y, 1, 1, 1, 0)
|
|||||||
TESTATOBDRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
|
TESTATOBDRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
|
||||||
HEIGHT_B, DIFF)
|
HEIGHT_B, DIFF)
|
||||||
|
|
||||||
#ifdef LITTLE_ENDIAN_TEST
|
#ifdef INTEL_TEST
|
||||||
TESTATOBD(ARGB, 4, 4, 1, RGB565, 2, 2, 1, 0)
|
TESTATOBD(ARGB, 4, 4, 1, RGB565, 2, 2, 1, 0)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -2447,7 +2447,7 @@ TEST_F(LibYUVConvertTest, TestDither) {
|
|||||||
TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
|
TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
|
||||||
YALIGN, benchmark_width_, DIFF, _Opt, +, 0, FMT_C, BPP_C)
|
YALIGN, benchmark_width_, DIFF, _Opt, +, 0, FMT_C, BPP_C)
|
||||||
|
|
||||||
#ifdef LITTLE_ENDIAN_TEST
|
#ifdef INTEL_TEST
|
||||||
TESTPLANARTOBD(I420, 2, 2, RGB565, 2, 2, 1, 9, ARGB, 4)
|
TESTPLANARTOBD(I420, 2, 2, RGB565, 2, 2, 1, 9, ARGB, 4)
|
||||||
#endif
|
#endif
|
||||||
#define TESTPTOB(NAME, UYVYTOI420, UYVYTONV12) \
|
#define TESTPTOB(NAME, UYVYTOI420, UYVYTONV12) \
|
||||||
@ -2591,7 +2591,7 @@ TESTPLANARTOE(H420, 2, 2, RAW, 1, 3, RGB24, 3)
|
|||||||
TESTPLANARTOE(H420, 2, 2, RGB24, 1, 3, RAW, 3)
|
TESTPLANARTOE(H420, 2, 2, RGB24, 1, 3, RAW, 3)
|
||||||
TESTPLANARTOE(H420, 2, 2, ARGB, 1, 4, RAW, 3)
|
TESTPLANARTOE(H420, 2, 2, ARGB, 1, 4, RAW, 3)
|
||||||
TESTPLANARTOE(H420, 2, 2, RAW, 1, 3, ARGB, 4)
|
TESTPLANARTOE(H420, 2, 2, RAW, 1, 3, ARGB, 4)
|
||||||
#ifdef LITTLE_ENDIAN_TEST
|
#ifdef INTEL_TEST
|
||||||
TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RGB565, 2)
|
TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RGB565, 2)
|
||||||
TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB1555, 2)
|
TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB1555, 2)
|
||||||
TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB4444, 2)
|
TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB4444, 2)
|
||||||
@ -2738,7 +2738,7 @@ TESTQPLANARTOE(I420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4)
|
|||||||
_Opt, +, 0, FMT_C, BPP_C)
|
_Opt, +, 0, FMT_C, BPP_C)
|
||||||
|
|
||||||
// Caveat: Destination needs to be 4 bytes
|
// Caveat: Destination needs to be 4 bytes
|
||||||
#ifdef LITTLE_ENDIAN_TEST
|
#ifdef INTEL_TEST
|
||||||
TESTPLANETOE(ARGB, 1, 4, AR30, 1, 4, ARGB, 4)
|
TESTPLANETOE(ARGB, 1, 4, AR30, 1, 4, ARGB, 4)
|
||||||
TESTPLANETOE(ABGR, 1, 4, AR30, 1, 4, ABGR, 4)
|
TESTPLANETOE(ABGR, 1, 4, AR30, 1, 4, ABGR, 4)
|
||||||
TESTPLANETOE(AR30, 1, 4, ARGB, 1, 4, ABGR, 4)
|
TESTPLANETOE(AR30, 1, 4, ARGB, 1, 4, ABGR, 4)
|
||||||
@ -2929,7 +2929,7 @@ TESTPLANAR16TOB(H210, 2, 1, ARGB, 4, 4, 1, 2)
|
|||||||
TESTPLANAR16TOB(H210, 2, 1, ABGR, 4, 4, 1, 2)
|
TESTPLANAR16TOB(H210, 2, 1, ABGR, 4, 4, 1, 2)
|
||||||
TESTPLANAR16TOB(U210, 2, 1, ARGB, 4, 4, 1, 2)
|
TESTPLANAR16TOB(U210, 2, 1, ARGB, 4, 4, 1, 2)
|
||||||
TESTPLANAR16TOB(U210, 2, 1, ABGR, 4, 4, 1, 2)
|
TESTPLANAR16TOB(U210, 2, 1, ABGR, 4, 4, 1, 2)
|
||||||
#ifdef LITTLE_ENDIAN_TEST
|
#ifdef INTEL_TEST
|
||||||
TESTPLANAR16TOB(I010, 2, 2, AR30, 4, 4, 1, 2)
|
TESTPLANAR16TOB(I010, 2, 2, AR30, 4, 4, 1, 2)
|
||||||
TESTPLANAR16TOB(I010, 2, 2, AB30, 4, 4, 1, 2)
|
TESTPLANAR16TOB(I010, 2, 2, AB30, 4, 4, 1, 2)
|
||||||
TESTPLANAR16TOB(H010, 2, 2, AR30, 4, 4, 1, 2)
|
TESTPLANAR16TOB(H010, 2, 2, AR30, 4, 4, 1, 2)
|
||||||
|
|||||||
@ -306,7 +306,9 @@ TEST_SCALETO(ARGBScale, 320, 240)
|
|||||||
TEST_SCALETO(ARGBScale, 569, 480)
|
TEST_SCALETO(ARGBScale, 569, 480)
|
||||||
TEST_SCALETO(ARGBScale, 640, 360)
|
TEST_SCALETO(ARGBScale, 640, 360)
|
||||||
TEST_SCALETO(ARGBScale, 1280, 720)
|
TEST_SCALETO(ARGBScale, 1280, 720)
|
||||||
|
#ifdef ENABLE_SLOW_TESTS
|
||||||
TEST_SCALETO(ARGBScale, 1920, 1080)
|
TEST_SCALETO(ARGBScale, 1920, 1080)
|
||||||
|
#endif // ENABLE_SLOW_TESTS
|
||||||
#undef TEST_SCALETO1
|
#undef TEST_SCALETO1
|
||||||
#undef TEST_SCALETO
|
#undef TEST_SCALETO
|
||||||
|
|
||||||
|
|||||||
@ -500,7 +500,7 @@ static int I444TestFilter_16(int src_width,
|
|||||||
#define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2)
|
#define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2)
|
||||||
#define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
|
#define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
|
||||||
|
|
||||||
#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \
|
#define TEST_FACTOR1(DISABLED_, name, filter, nom, denom, max_diff) \
|
||||||
TEST_F(LibYUVScaleTest, I420ScaleDownBy##name##_##filter) { \
|
TEST_F(LibYUVScaleTest, I420ScaleDownBy##name##_##filter) { \
|
||||||
int diff = I420TestFilter( \
|
int diff = I420TestFilter( \
|
||||||
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
|
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
|
||||||
@ -517,7 +517,7 @@ static int I444TestFilter_16(int src_width,
|
|||||||
benchmark_cpu_info_); \
|
benchmark_cpu_info_); \
|
||||||
EXPECT_LE(diff, max_diff); \
|
EXPECT_LE(diff, max_diff); \
|
||||||
} \
|
} \
|
||||||
TEST_F(LibYUVScaleTest, I420ScaleDownBy##name##_##filter##_16) { \
|
TEST_F(LibYUVScaleTest, DISABLED_##I420ScaleDownBy##name##_##filter##_16) { \
|
||||||
int diff = I420TestFilter_16( \
|
int diff = I420TestFilter_16( \
|
||||||
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
|
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
|
||||||
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
|
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
|
||||||
@ -525,7 +525,7 @@ static int I444TestFilter_16(int src_width,
|
|||||||
benchmark_cpu_info_); \
|
benchmark_cpu_info_); \
|
||||||
EXPECT_LE(diff, max_diff); \
|
EXPECT_LE(diff, max_diff); \
|
||||||
} \
|
} \
|
||||||
TEST_F(LibYUVScaleTest, I444ScaleDownBy##name##_##filter##_16) { \
|
TEST_F(LibYUVScaleTest, DISABLED_##I444ScaleDownBy##name##_##filter##_16) { \
|
||||||
int diff = I444TestFilter_16( \
|
int diff = I444TestFilter_16( \
|
||||||
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
|
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
|
||||||
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
|
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
|
||||||
@ -536,11 +536,19 @@ static int I444TestFilter_16(int src_width,
|
|||||||
|
|
||||||
// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
|
// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
|
||||||
// filtering is different fixed point implementations for SSSE3, Neon and C.
|
// filtering is different fixed point implementations for SSSE3, Neon and C.
|
||||||
|
#ifdef ENABLE_SLOW_TESTS
|
||||||
#define TEST_FACTOR(name, nom, denom, boxdiff) \
|
#define TEST_FACTOR(name, nom, denom, boxdiff) \
|
||||||
TEST_FACTOR1(name, None, nom, denom, 0) \
|
TEST_FACTOR1(, name, None, nom, denom, 0) \
|
||||||
TEST_FACTOR1(name, Linear, nom, denom, 3) \
|
TEST_FACTOR1(, name, Linear, nom, denom, 3) \
|
||||||
TEST_FACTOR1(name, Bilinear, nom, denom, 3) \
|
TEST_FACTOR1(, name, Bilinear, nom, denom, 3) \
|
||||||
TEST_FACTOR1(name, Box, nom, denom, boxdiff)
|
TEST_FACTOR1(, name, Box, nom, denom, boxdiff)
|
||||||
|
#else
|
||||||
|
#define TEST_FACTOR(name, nom, denom, boxdiff) \
|
||||||
|
TEST_FACTOR1(DISABLED_, name, None, nom, denom, 0) \
|
||||||
|
TEST_FACTOR1(DISABLED_, name, Linear, nom, denom, 3) \
|
||||||
|
TEST_FACTOR1(DISABLED_, name, Bilinear, nom, denom, 3) \
|
||||||
|
TEST_FACTOR1(DISABLED_, name, Box, nom, denom, boxdiff)
|
||||||
|
#endif
|
||||||
|
|
||||||
TEST_FACTOR(2, 1, 2, 0)
|
TEST_FACTOR(2, 1, 2, 0)
|
||||||
TEST_FACTOR(4, 1, 4, 0)
|
TEST_FACTOR(4, 1, 4, 0)
|
||||||
@ -553,7 +561,7 @@ TEST_FACTOR(3, 1, 3, 0)
|
|||||||
#undef SX
|
#undef SX
|
||||||
#undef DX
|
#undef DX
|
||||||
|
|
||||||
#define TEST_SCALETO1(name, width, height, filter, max_diff) \
|
#define TEST_SCALETO1(DISABLED_, name, width, height, filter, max_diff) \
|
||||||
TEST_F(LibYUVScaleTest, I420##name##To##width##x##height##_##filter) { \
|
TEST_F(LibYUVScaleTest, I420##name##To##width##x##height##_##filter) { \
|
||||||
int diff = I420TestFilter(benchmark_width_, benchmark_height_, width, \
|
int diff = I420TestFilter(benchmark_width_, benchmark_height_, width, \
|
||||||
height, kFilter##filter, benchmark_iterations_, \
|
height, kFilter##filter, benchmark_iterations_, \
|
||||||
@ -566,13 +574,13 @@ TEST_FACTOR(3, 1, 3, 0)
|
|||||||
disable_cpu_flags_, benchmark_cpu_info_); \
|
disable_cpu_flags_, benchmark_cpu_info_); \
|
||||||
EXPECT_LE(diff, max_diff); \
|
EXPECT_LE(diff, max_diff); \
|
||||||
} \
|
} \
|
||||||
TEST_F(LibYUVScaleTest, I420##name##To##width##x##height##_##filter##_16) { \
|
TEST_F(LibYUVScaleTest, DISABLED_##I420##name##To##width##x##height##_##filter##_16) { \
|
||||||
int diff = I420TestFilter_16( \
|
int diff = I420TestFilter_16( \
|
||||||
benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
|
benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
|
||||||
benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
|
benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
|
||||||
EXPECT_LE(diff, max_diff); \
|
EXPECT_LE(diff, max_diff); \
|
||||||
} \
|
} \
|
||||||
TEST_F(LibYUVScaleTest, I444##name##To##width##x##height##_##filter##_16) { \
|
TEST_F(LibYUVScaleTest, DISABLED_##I444##name##To##width##x##height##_##filter##_16) { \
|
||||||
int diff = I444TestFilter_16( \
|
int diff = I444TestFilter_16( \
|
||||||
benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
|
benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
|
||||||
benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
|
benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
|
||||||
@ -593,7 +601,7 @@ TEST_FACTOR(3, 1, 3, 0)
|
|||||||
EXPECT_LE(diff, max_diff); \
|
EXPECT_LE(diff, max_diff); \
|
||||||
} \
|
} \
|
||||||
TEST_F(LibYUVScaleTest, \
|
TEST_F(LibYUVScaleTest, \
|
||||||
I420##name##From##width##x##height##_##filter##_16) { \
|
DISABLED_##I420##name##From##width##x##height##_##filter##_16) { \
|
||||||
int diff = I420TestFilter_16(width, height, Abs(benchmark_width_), \
|
int diff = I420TestFilter_16(width, height, Abs(benchmark_width_), \
|
||||||
Abs(benchmark_height_), kFilter##filter, \
|
Abs(benchmark_height_), kFilter##filter, \
|
||||||
benchmark_iterations_, disable_cpu_flags_, \
|
benchmark_iterations_, disable_cpu_flags_, \
|
||||||
@ -601,7 +609,7 @@ TEST_FACTOR(3, 1, 3, 0)
|
|||||||
EXPECT_LE(diff, max_diff); \
|
EXPECT_LE(diff, max_diff); \
|
||||||
} \
|
} \
|
||||||
TEST_F(LibYUVScaleTest, \
|
TEST_F(LibYUVScaleTest, \
|
||||||
I444##name##From##width##x##height##_##filter##_16) { \
|
DISABLED_##I444##name##From##width##x##height##_##filter##_16) { \
|
||||||
int diff = I444TestFilter_16(width, height, Abs(benchmark_width_), \
|
int diff = I444TestFilter_16(width, height, Abs(benchmark_width_), \
|
||||||
Abs(benchmark_height_), kFilter##filter, \
|
Abs(benchmark_height_), kFilter##filter, \
|
||||||
benchmark_iterations_, disable_cpu_flags_, \
|
benchmark_iterations_, disable_cpu_flags_, \
|
||||||
@ -609,19 +617,30 @@ TEST_FACTOR(3, 1, 3, 0)
|
|||||||
EXPECT_LE(diff, max_diff); \
|
EXPECT_LE(diff, max_diff); \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef ENABLE_SLOW_TESTS
|
||||||
// Test scale to a specified size with all 4 filters.
|
// Test scale to a specified size with all 4 filters.
|
||||||
#define TEST_SCALETO(name, width, height) \
|
#define TEST_SCALETO(name, width, height) \
|
||||||
TEST_SCALETO1(name, width, height, None, 0) \
|
TEST_SCALETO1(, name, width, height, None, 0) \
|
||||||
TEST_SCALETO1(name, width, height, Linear, 3) \
|
TEST_SCALETO1(, name, width, height, Linear, 3) \
|
||||||
TEST_SCALETO1(name, width, height, Bilinear, 3) \
|
TEST_SCALETO1(, name, width, height, Bilinear, 3) \
|
||||||
TEST_SCALETO1(name, width, height, Box, 3)
|
TEST_SCALETO1(, name, width, height, Box, 3)
|
||||||
|
#else
|
||||||
|
// Test scale to a specified size with all 4 filters.
|
||||||
|
#define TEST_SCALETO(name, width, height) \
|
||||||
|
TEST_SCALETO1(DISABLED_, name, width, height, None, 0) \
|
||||||
|
TEST_SCALETO1(DISABLED_, name, width, height, Linear, 3) \
|
||||||
|
TEST_SCALETO1(DISABLED_, name, width, height, Bilinear, 3) \
|
||||||
|
TEST_SCALETO1(DISABLED_, name, width, height, Box, 3)
|
||||||
|
#endif
|
||||||
|
|
||||||
TEST_SCALETO(Scale, 1, 1)
|
TEST_SCALETO(Scale, 1, 1)
|
||||||
TEST_SCALETO(Scale, 320, 240)
|
TEST_SCALETO(Scale, 320, 240)
|
||||||
TEST_SCALETO(Scale, 569, 480)
|
TEST_SCALETO(Scale, 569, 480)
|
||||||
TEST_SCALETO(Scale, 640, 360)
|
TEST_SCALETO(Scale, 640, 360)
|
||||||
TEST_SCALETO(Scale, 1280, 720)
|
TEST_SCALETO(Scale, 1280, 720)
|
||||||
|
#ifdef ENABLE_SLOW_TESTS
|
||||||
TEST_SCALETO(Scale, 1920, 1080)
|
TEST_SCALETO(Scale, 1920, 1080)
|
||||||
|
#endif // ENABLE_SLOW_TESTS
|
||||||
#undef TEST_SCALETO1
|
#undef TEST_SCALETO1
|
||||||
#undef TEST_SCALETO
|
#undef TEST_SCALETO
|
||||||
|
|
||||||
@ -879,7 +898,7 @@ static int TestPlaneFilter_16(int src_width,
|
|||||||
#define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
|
#define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
|
||||||
|
|
||||||
#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \
|
#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \
|
||||||
TEST_F(LibYUVScaleTest, ScalePlaneDownBy##name##_##filter##_16) { \
|
TEST_F(LibYUVScaleTest, DISABLED_##ScalePlaneDownBy##name##_##filter##_16) {\
|
||||||
int diff = TestPlaneFilter_16( \
|
int diff = TestPlaneFilter_16( \
|
||||||
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
|
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
|
||||||
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
|
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user