mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 16:56:55 +08:00
Upstream minor changes. Faster tests, Faster YUV Rotate180 and Mirror
Bug: libyuv:840, libyuv:849: b/144318948 Change-Id: I303c02ac2b838a09d3e623df7a69ffc085fe3cd2 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/1914781 Reviewed-by: Miguel Casas <mcasas@chromium.org> Commit-Queue: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
parent
6afd9becdf
commit
d82f4baf5f
4
BUILD.gn
4
BUILD.gn
@ -73,7 +73,7 @@ group("libyuv") {
|
||||
deps += [ ":libyuv_mmi" ]
|
||||
}
|
||||
|
||||
if (!is_ios) {
|
||||
if (!is_ios && !libyuv_disable_jpeg) {
|
||||
# Make sure that clients of libyuv link with libjpeg. This can't go in
|
||||
# libyuv_internal because in Windows x64 builds that will generate a clang
|
||||
# build of libjpeg, and we don't want two copies.
|
||||
@ -150,7 +150,7 @@ static_library("libyuv_internal") {
|
||||
configs += [ "//build/config/gcc:symbol_visibility_default" ]
|
||||
}
|
||||
|
||||
if (!is_ios) {
|
||||
if (!is_ios && !libyuv_disable_jpeg) {
|
||||
defines += [ "HAVE_JPEG" ]
|
||||
|
||||
# Needed to pull in libjpeg headers. Can't add //third_party:jpeg to deps
|
||||
|
||||
@ -734,7 +734,7 @@ void MirrorPlane(const uint8_t* src_y,
|
||||
#if defined(HAS_MIRRORROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
MirrorRow = MirrorRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
MirrorRow = MirrorRow_NEON;
|
||||
}
|
||||
}
|
||||
|
||||
@ -142,7 +142,7 @@ void RotatePlane180(const uint8_t* src,
|
||||
#if defined(HAS_MIRRORROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
MirrorRow = MirrorRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
if (IS_ALIGNED(width, 32)) {
|
||||
MirrorRow = MirrorRow_NEON;
|
||||
}
|
||||
}
|
||||
@ -207,11 +207,11 @@ void RotatePlane180(const uint8_t* src,
|
||||
|
||||
// Odd height will harmlessly mirror the middle row twice.
|
||||
for (y = 0; y < half_height; ++y) {
|
||||
MirrorRow(src, row, width); // Mirror first row into a buffer
|
||||
src += src_stride;
|
||||
CopyRow(src, row, width); // Copy first row into buffer
|
||||
MirrorRow(src_bot, dst, width); // Mirror last row into first row
|
||||
MirrorRow(row, dst_bot, width); // Mirror buffer into last row
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
CopyRow(row, dst_bot, width); // Copy first mirrored row into last
|
||||
src_bot -= src_stride;
|
||||
dst_bot -= dst_stride;
|
||||
}
|
||||
|
||||
@ -1156,7 +1156,7 @@ ANY11M(MirrorRow_Any_AVX2, MirrorRow_AVX2, 1, 31)
|
||||
ANY11M(MirrorRow_Any_SSSE3, MirrorRow_SSSE3, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_MIRRORROW_NEON
|
||||
ANY11M(MirrorRow_Any_NEON, MirrorRow_NEON, 1, 15)
|
||||
ANY11M(MirrorRow_Any_NEON, MirrorRow_NEON, 1, 31)
|
||||
#endif
|
||||
#ifdef HAS_MIRRORROW_MSA
|
||||
ANY11M(MirrorRow_Any_MSA, MirrorRow_MSA, 1, 63)
|
||||
|
||||
@ -84,7 +84,7 @@ static const uvec8 kAddUV128 = {128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u,
|
||||
128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u};
|
||||
|
||||
static const uvec16 kSub128 = {0x8080u, 0x8080u, 0x8080u, 0x8080u,
|
||||
0x8080u, 0x8080u, 0x8080u, 0x8080u};
|
||||
0x8080u, 0x8080u, 0x8080u, 0x8080u};
|
||||
|
||||
#endif // defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_I422TOARGBROW_SSSE3)
|
||||
|
||||
@ -1101,10 +1101,8 @@ void ABGRToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width) {
|
||||
"lea 0x40(%0),%0 \n" \
|
||||
"phaddw %%xmm0,%%xmm6 \n" \
|
||||
"phaddw %%xmm2,%%xmm1 \n" \
|
||||
"paddw %%" #round \
|
||||
",%%xmm6 \n" \
|
||||
"paddw %%" #round \
|
||||
",%%xmm1 \n" \
|
||||
"paddw %%" #round ",%%xmm6 \n" \
|
||||
"paddw %%" #round ",%%xmm1 \n" \
|
||||
"psrlw $0x8,%%xmm6 \n" \
|
||||
"psrlw $0x8,%%xmm1 \n" \
|
||||
"packuswb %%xmm1,%%xmm6 \n" \
|
||||
@ -1113,35 +1111,33 @@ void ABGRToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width) {
|
||||
"sub $0x10,%2 \n" \
|
||||
"jg 1b \n"
|
||||
|
||||
#define RGBTOY_AVX2(round) \
|
||||
"1: \n" \
|
||||
"vmovdqu (%0),%%ymm0 \n" \
|
||||
"vmovdqu 0x20(%0),%%ymm1 \n" \
|
||||
"vmovdqu 0x40(%0),%%ymm2 \n" \
|
||||
"vmovdqu 0x60(%0),%%ymm3 \n" \
|
||||
"vpsubb %%ymm5, %%ymm0, %%ymm0 \n" \
|
||||
"vpsubb %%ymm5, %%ymm1, %%ymm1 \n" \
|
||||
"vpsubb %%ymm5, %%ymm2, %%ymm2 \n" \
|
||||
"vpsubb %%ymm5, %%ymm3, %%ymm3 \n" \
|
||||
"vpmaddubsw %%ymm0,%%ymm4,%%ymm0 \n" \
|
||||
"vpmaddubsw %%ymm1,%%ymm4,%%ymm1 \n" \
|
||||
"vpmaddubsw %%ymm2,%%ymm4,%%ymm2 \n" \
|
||||
"vpmaddubsw %%ymm3,%%ymm4,%%ymm3 \n" \
|
||||
"lea 0x80(%0),%0 \n" \
|
||||
"vphaddw %%ymm1,%%ymm0,%%ymm0 \n" /* mutates. */ \
|
||||
"vphaddw %%ymm3,%%ymm2,%%ymm2 \n" \
|
||||
"vpaddw %%" #round \
|
||||
",%%ymm0,%%ymm0 \n" /* Add .5 for rounding. */ \
|
||||
"vpaddw %%" #round \
|
||||
",%%ymm2,%%ymm2 \n" \
|
||||
"vpsrlw $0x8,%%ymm0,%%ymm0 \n" \
|
||||
"vpsrlw $0x8,%%ymm2,%%ymm2 \n" \
|
||||
"vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" /* mutates. */ \
|
||||
"vpermd %%ymm0,%%ymm6,%%ymm0 \n" /* unmutate. */ \
|
||||
"vmovdqu %%ymm0,(%1) \n" \
|
||||
"lea 0x20(%1),%1 \n" \
|
||||
"sub $0x20,%2 \n" \
|
||||
"jg 1b \n" \
|
||||
#define RGBTOY_AVX2(round) \
|
||||
"1: \n" \
|
||||
"vmovdqu (%0),%%ymm0 \n" \
|
||||
"vmovdqu 0x20(%0),%%ymm1 \n" \
|
||||
"vmovdqu 0x40(%0),%%ymm2 \n" \
|
||||
"vmovdqu 0x60(%0),%%ymm3 \n" \
|
||||
"vpsubb %%ymm5, %%ymm0, %%ymm0 \n" \
|
||||
"vpsubb %%ymm5, %%ymm1, %%ymm1 \n" \
|
||||
"vpsubb %%ymm5, %%ymm2, %%ymm2 \n" \
|
||||
"vpsubb %%ymm5, %%ymm3, %%ymm3 \n" \
|
||||
"vpmaddubsw %%ymm0,%%ymm4,%%ymm0 \n" \
|
||||
"vpmaddubsw %%ymm1,%%ymm4,%%ymm1 \n" \
|
||||
"vpmaddubsw %%ymm2,%%ymm4,%%ymm2 \n" \
|
||||
"vpmaddubsw %%ymm3,%%ymm4,%%ymm3 \n" \
|
||||
"lea 0x80(%0),%0 \n" \
|
||||
"vphaddw %%ymm1,%%ymm0,%%ymm0 \n" /* mutates. */ \
|
||||
"vphaddw %%ymm3,%%ymm2,%%ymm2 \n" \
|
||||
"vpaddw %%" #round ",%%ymm0,%%ymm0 \n" /* Add .5 for rounding. */ \
|
||||
"vpaddw %%" #round ",%%ymm2,%%ymm2 \n" \
|
||||
"vpsrlw $0x8,%%ymm0,%%ymm0 \n" \
|
||||
"vpsrlw $0x8,%%ymm2,%%ymm2 \n" \
|
||||
"vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" /* mutates. */ \
|
||||
"vpermd %%ymm0,%%ymm6,%%ymm0 \n" /* unmutate. */ \
|
||||
"vmovdqu %%ymm0,(%1) \n" \
|
||||
"lea 0x20(%1),%1 \n" \
|
||||
"sub $0x20,%2 \n" \
|
||||
"jg 1b \n" \
|
||||
"vzeroupper \n"
|
||||
|
||||
#ifdef HAS_ARGBTOYROW_SSSE3
|
||||
@ -1152,15 +1148,15 @@ void ARGBToYRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width) {
|
||||
"movdqa %4,%%xmm5 \n"
|
||||
"movdqa %5,%%xmm7 \n"
|
||||
|
||||
LABELALIGN RGBTOY(xmm7)
|
||||
LABELALIGN
|
||||
RGBTOY(xmm7)
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(dst_y), // %1
|
||||
"+r"(width) // %2
|
||||
: "m"(kARGBToY), // %3
|
||||
"m"(kSub128), // %4
|
||||
"m"(kAddY16) // %5
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
||||
"xmm7");
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
|
||||
}
|
||||
#endif // HAS_ARGBTOYROW_SSSE3
|
||||
|
||||
@ -1172,7 +1168,8 @@ void ARGBToYJRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width) {
|
||||
"movdqa %3,%%xmm4 \n"
|
||||
"movdqa %4,%%xmm5 \n"
|
||||
|
||||
LABELALIGN RGBTOY(xmm5)
|
||||
LABELALIGN
|
||||
RGBTOY(xmm5)
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(dst_y), // %1
|
||||
"+r"(width) // %2
|
||||
@ -1190,7 +1187,8 @@ void RGBAToYJRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
|
||||
"movdqa %3,%%xmm4 \n"
|
||||
"movdqa %4,%%xmm5 \n"
|
||||
|
||||
LABELALIGN RGBTOY(xmm5)
|
||||
LABELALIGN
|
||||
RGBTOY(xmm5)
|
||||
: "+r"(src_rgba), // %0
|
||||
"+r"(dst_y), // %1
|
||||
"+r"(width) // %2
|
||||
@ -1212,7 +1210,8 @@ void ARGBToYRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) {
|
||||
"vbroadcastf128 %5,%%ymm7 \n"
|
||||
"vmovdqu %6,%%ymm6 \n"
|
||||
|
||||
LABELALIGN RGBTOY_AVX2(ymm7)
|
||||
LABELALIGN
|
||||
RGBTOY_AVX2(ymm7)
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(dst_y), // %1
|
||||
"+r"(width) // %2
|
||||
@ -1220,8 +1219,7 @@ void ARGBToYRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) {
|
||||
"m"(kSub128), // %4
|
||||
"m"(kAddY16), // %5
|
||||
"m"(kPermdARGBToY_AVX) // %6
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
||||
"xmm7");
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
|
||||
}
|
||||
#endif // HAS_ARGBTOYROW_AVX2
|
||||
|
||||
@ -1234,7 +1232,8 @@ void ABGRToYRow_AVX2(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
|
||||
"vbroadcastf128 %5,%%ymm7 \n"
|
||||
"vmovdqu %6,%%ymm6 \n"
|
||||
|
||||
LABELALIGN RGBTOY_AVX2(ymm7)
|
||||
LABELALIGN
|
||||
RGBTOY_AVX2(ymm7)
|
||||
: "+r"(src_abgr), // %0
|
||||
"+r"(dst_y), // %1
|
||||
"+r"(width) // %2
|
||||
@ -1242,8 +1241,7 @@ void ABGRToYRow_AVX2(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
|
||||
"m"(kSub128), // %4
|
||||
"m"(kAddY16), // %5
|
||||
"m"(kPermdARGBToY_AVX) // %6
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
||||
"xmm7");
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
|
||||
}
|
||||
#endif // HAS_ABGRTOYROW_AVX2
|
||||
|
||||
@ -1255,15 +1253,15 @@ void ARGBToYJRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) {
|
||||
"vbroadcastf128 %4,%%ymm5 \n"
|
||||
"vmovdqu %5,%%ymm6 \n"
|
||||
|
||||
LABELALIGN RGBTOY_AVX2(ymm5)
|
||||
LABELALIGN
|
||||
RGBTOY_AVX2(ymm5)
|
||||
: "+r"(src_argb), // %0
|
||||
"+r"(dst_y), // %1
|
||||
"+r"(width) // %2
|
||||
: "m"(kARGBToYJ), // %3
|
||||
"m"(kSub128), // %4
|
||||
"m"(kPermdARGBToY_AVX) // %5
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
||||
"xmm7");
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
|
||||
}
|
||||
#endif // HAS_ARGBTOYJROW_AVX2
|
||||
|
||||
@ -1275,8 +1273,9 @@ void RGBAToYJRow_AVX2(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
|
||||
"vbroadcastf128 %4,%%ymm5 \n"
|
||||
"vmovdqu %5,%%ymm6 \n"
|
||||
|
||||
LABELALIGN RGBTOY_AVX2(
|
||||
ymm5) "vzeroupper \n"
|
||||
LABELALIGN
|
||||
RGBTOY_AVX2(ymm5)
|
||||
"vzeroupper \n"
|
||||
: "+r"(src_rgba), // %0
|
||||
"+r"(dst_y), // %1
|
||||
"+r"(width) // %2
|
||||
@ -1537,7 +1536,7 @@ void ARGBToUVJRow_AVX2(const uint8_t* src_argb0,
|
||||
"+r"(dst_v), // %2
|
||||
"+rm"(width) // %3
|
||||
: "r"((intptr_t)(src_stride_argb)), // %4
|
||||
"m"(kSub128), // %5
|
||||
"m"(kSub128), // %5
|
||||
"m"(kARGBToVJ), // %6
|
||||
"m"(kARGBToUJ), // %7
|
||||
"m"(kShufARGBToUV_AVX) // %8
|
||||
@ -1607,7 +1606,7 @@ void ARGBToUVJRow_SSSE3(const uint8_t* src_argb0,
|
||||
: "r"((intptr_t)(src_stride_argb)), // %4
|
||||
"m"(kARGBToVJ), // %5
|
||||
"m"(kARGBToUJ), // %6
|
||||
"m"(kSub128) // %7
|
||||
"m"(kSub128) // %7
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6", "xmm7");
|
||||
}
|
||||
#endif // HAS_ARGBTOUVJROW_SSSE3
|
||||
@ -1676,15 +1675,15 @@ void BGRAToYRow_SSSE3(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
|
||||
"movdqa %4,%%xmm5 \n"
|
||||
"movdqa %5,%%xmm7 \n"
|
||||
|
||||
LABELALIGN RGBTOY(xmm7)
|
||||
LABELALIGN
|
||||
RGBTOY(xmm7)
|
||||
: "+r"(src_bgra), // %0
|
||||
"+r"(dst_y), // %1
|
||||
"+r"(width) // %2
|
||||
: "m"(kBGRAToY), // %3
|
||||
"m"(kSub128), // %4
|
||||
"m"(kAddY16) // %5
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
||||
"xmm7");
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
|
||||
}
|
||||
|
||||
void BGRAToUVRow_SSSE3(const uint8_t* src_bgra0,
|
||||
@ -1756,15 +1755,15 @@ void ABGRToYRow_SSSE3(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
|
||||
"movdqa %4,%%xmm5 \n"
|
||||
"movdqa %5,%%xmm7 \n"
|
||||
|
||||
LABELALIGN RGBTOY(xmm7)
|
||||
LABELALIGN
|
||||
RGBTOY(xmm7)
|
||||
: "+r"(src_abgr), // %0
|
||||
"+r"(dst_y), // %1
|
||||
"+r"(width) // %2
|
||||
: "m"(kABGRToY), // %3
|
||||
"m"(kSub128), // %4
|
||||
"m"(kAddY16) // %5
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
||||
"xmm7");
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
|
||||
}
|
||||
|
||||
void RGBAToYRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
|
||||
@ -1773,15 +1772,15 @@ void RGBAToYRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
|
||||
"movdqa %4,%%xmm5 \n"
|
||||
"movdqa %5,%%xmm7 \n"
|
||||
|
||||
LABELALIGN RGBTOY(xmm7)
|
||||
LABELALIGN
|
||||
RGBTOY(xmm7)
|
||||
: "+r"(src_rgba), // %0
|
||||
"+r"(dst_y), // %1
|
||||
"+r"(width) // %2
|
||||
: "m"(kRGBAToY), // %3
|
||||
"m"(kSub128), // %4
|
||||
"m"(kAddY16) // %5
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
||||
"xmm7");
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
|
||||
}
|
||||
|
||||
void ABGRToUVRow_SSSE3(const uint8_t* src_abgr0,
|
||||
|
||||
2665
source/row_mmi.cc
2665
source/row_mmi.cc
File diff suppressed because it is too large
Load Diff
@ -682,22 +682,23 @@ void ARGBSetRow_NEON(uint8_t* dst, uint32_t v32, int width) {
|
||||
void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width) {
|
||||
asm volatile(
|
||||
// Start at end of source row.
|
||||
"mov r3, #-16 \n"
|
||||
"add %0, %0, %2 \n"
|
||||
"sub %0, #16 \n"
|
||||
"sub %0, %0, #32 \n" // 32 bytes per loop
|
||||
|
||||
"1: \n"
|
||||
"vld1.8 {q0}, [%0], r3 \n" // src -= 16
|
||||
"subs %2, #16 \n" // 16 pixels per loop.
|
||||
"vrev64.8 q0, q0 \n"
|
||||
"vst1.8 {d1}, [%1]! \n" // dst += 16
|
||||
"vst1.8 {d0}, [%1]! \n"
|
||||
"vld1.8 {q1, q2}, [%0], %3 \n" // src -= 32
|
||||
"subs %2, #32 \n" // 32 pixels per loop.
|
||||
"vrev64.8 q0, q2 \n"
|
||||
"vrev64.8 q1, q1 \n"
|
||||
"vswp d0, d1 \n"
|
||||
"vswp d2, d3 \n"
|
||||
"vst1.8 {q0, q1}, [%1]! \n" // dst += 32
|
||||
"bgt 1b \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst), // %1
|
||||
"+r"(width) // %2
|
||||
:
|
||||
: "cc", "memory", "r3", "q0");
|
||||
: "r"(-32) // %3
|
||||
: "cc", "memory", "q0", "q1", "q2");
|
||||
}
|
||||
|
||||
void MirrorUVRow_NEON(const uint8_t* src_uv,
|
||||
|
||||
@ -723,23 +723,29 @@ void ARGBSetRow_NEON(uint8_t* dst, uint32_t v32, int width) {
|
||||
: "cc", "memory", "v0");
|
||||
}
|
||||
|
||||
// Shuffle table for reversing the bytes.
|
||||
static const uvec8 kShuffleMirror = {15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u,
|
||||
7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u};
|
||||
|
||||
void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width) {
|
||||
asm volatile(
|
||||
// Start at end of source row.
|
||||
"ld1 {v3.16b}, [%4] \n" // shuffler
|
||||
"add %0, %0, %w2, sxtw \n"
|
||||
"sub %0, %0, #16 \n"
|
||||
"sub %0, %0, #32 \n"
|
||||
"1: \n"
|
||||
"ld1 {v0.16b}, [%0], %3 \n" // src -= 16
|
||||
"subs %w2, %w2, #16 \n" // 16 pixels per loop.
|
||||
"rev64 v0.16b, v0.16b \n"
|
||||
"st1 {v0.D}[1], [%1], #8 \n" // dst += 16
|
||||
"st1 {v0.D}[0], [%1], #8 \n"
|
||||
"ld1 {v1.16b,v2.16b}, [%0], %3 \n" // src -= 32
|
||||
"subs %w2, %w2, #32 \n" // 32 pixels per loop.
|
||||
"tbl v1.16b, {v1.16b}, v3.16b \n"
|
||||
"tbl v0.16b, {v2.16b}, v3.16b \n"
|
||||
"st1 {v0.16b, v1.16b}, [%1], #32 \n" // store 32 pixels
|
||||
"b.gt 1b \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst), // %1
|
||||
"+r"(width) // %2
|
||||
: "r"((ptrdiff_t)-16) // %3
|
||||
: "cc", "memory", "v0");
|
||||
: "r"((ptrdiff_t)-32), // %3
|
||||
"r"(&kShuffleMirror) // %4
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3");
|
||||
}
|
||||
|
||||
void MirrorUVRow_NEON(const uint8_t* src_uv,
|
||||
|
||||
@ -40,9 +40,9 @@
|
||||
#endif
|
||||
|
||||
// Some functions fail on big endian. Enable these tests on all cpus except
|
||||
// PowerPC
|
||||
#if !defined(__powerpc__)
|
||||
#define LITTLE_ENDIAN_TEST 1
|
||||
// PowerPC, but they are not optimized so disabled by default.
|
||||
#if !defined(__powerpc__) && defined(ENABLE_SLOW_TESTS)
|
||||
#define INTEL_TEST 1
|
||||
#endif
|
||||
|
||||
namespace libyuv {
|
||||
@ -691,7 +691,7 @@ TESTPLANARTOB(J420, 2, 2, RAW, 3, 3, 1)
|
||||
TESTPLANARTOB(J420, 2, 2, RGB24, 3, 3, 1)
|
||||
TESTPLANARTOB(H420, 2, 2, RAW, 3, 3, 1)
|
||||
TESTPLANARTOB(H420, 2, 2, RGB24, 3, 3, 1)
|
||||
#ifdef LITTLE_ENDIAN_TEST
|
||||
#ifdef INTEL_TEST
|
||||
TESTPLANARTOB(I420, 2, 2, RGB565, 2, 2, 1)
|
||||
TESTPLANARTOB(J420, 2, 2, RGB565, 2, 2, 1)
|
||||
TESTPLANARTOB(H420, 2, 2, RGB565, 2, 2, 1)
|
||||
@ -723,7 +723,7 @@ TESTPLANARTOB(I422, 2, 1, YUY2, 2, 4, 1)
|
||||
TESTPLANARTOB(I422, 2, 1, UYVY, 2, 4, 1)
|
||||
TESTPLANARTOB(I420, 2, 2, I400, 1, 1, 1)
|
||||
TESTPLANARTOB(J420, 2, 2, J400, 1, 1, 1)
|
||||
#ifdef LITTLE_ENDIAN_TEST
|
||||
#ifdef INTEL_TEST
|
||||
TESTPLANARTOB(I420, 2, 2, AR30, 4, 4, 1)
|
||||
TESTPLANARTOB(H420, 2, 2, AR30, 4, 4, 1)
|
||||
#endif
|
||||
@ -876,7 +876,7 @@ TESTBIPLANARTOB(NV12, 2, 2, RGB24, RGB24, 3, 2)
|
||||
TESTBIPLANARTOB(NV21, 2, 2, RGB24, RGB24, 3, 2)
|
||||
TESTBIPLANARTOB(NV12, 2, 2, RAW, RAW, 3, 2)
|
||||
TESTBIPLANARTOB(NV21, 2, 2, RAW, RAW, 3, 2)
|
||||
#ifdef LITTLE_ENDIAN_TEST
|
||||
#ifdef INTEL_TEST
|
||||
TESTBIPLANARTOB(NV12, 2, 2, RGB565, RGB565, 2, 9)
|
||||
#endif
|
||||
TESTBIPLANARTOB(NV21, 2, 2, YUV24, RAW, 3, 2)
|
||||
@ -1012,7 +1012,7 @@ TESTATOPLANAR(ARGB, 4, 1, I422, 2, 1, 2)
|
||||
TESTATOPLANAR(ARGB, 4, 1, I444, 1, 1, 2)
|
||||
TESTATOPLANAR(ARGB, 4, 1, J420, 2, 2, ARM_YUV_ERROR)
|
||||
TESTATOPLANAR(ARGB, 4, 1, J422, 2, 1, ARM_YUV_ERROR)
|
||||
#ifdef LITTLE_ENDIAN_TEST
|
||||
#ifdef INTEL_TEST
|
||||
TESTATOPLANAR(ARGB1555, 2, 1, I420, 2, 2, 15)
|
||||
TESTATOPLANAR(ARGB4444, 2, 1, I420, 2, 2, 17)
|
||||
#endif
|
||||
@ -1022,7 +1022,7 @@ TESTATOPLANAR(J400, 1, 1, J420, 2, 2, 2)
|
||||
TESTATOPLANAR(RAW, 3, 1, I420, 2, 2, 4)
|
||||
TESTATOPLANAR(RGB24, 3, 1, I420, 2, 2, 4)
|
||||
TESTATOPLANAR(RGB24, 3, 1, J420, 2, 2, ARM_YUV_ERROR)
|
||||
#ifdef LITTLE_ENDIAN_TEST
|
||||
#ifdef INTEL_TEST
|
||||
TESTATOPLANAR(RGB565, 2, 1, I420, 2, 2, 5)
|
||||
#endif
|
||||
TESTATOPLANAR(RGBA, 4, 1, I420, 2, 2, 4)
|
||||
@ -1200,20 +1200,20 @@ TESTATOBIPLANAR(AYUV, 1, 4, NV21, 2, 2)
|
||||
// TODO(fbarchard): make ARM version of C code that matches NEON.
|
||||
TESTATOB(AB30, 4, 4, 1, ABGR, 4, 4, 1, 0)
|
||||
TESTATOB(AB30, 4, 4, 1, ARGB, 4, 4, 1, 0)
|
||||
#ifdef LITTLE_ENDIAN_TEST
|
||||
#ifdef INTEL_TEST
|
||||
TESTATOB(ABGR, 4, 4, 1, AR30, 4, 4, 1, 0)
|
||||
#endif
|
||||
TESTATOB(ABGR, 4, 4, 1, ARGB, 4, 4, 1, 0)
|
||||
#ifdef LITTLE_ENDIAN_TEST
|
||||
#ifdef INTEL_TEST
|
||||
TESTATOB(AR30, 4, 4, 1, AB30, 4, 4, 1, 0)
|
||||
#endif
|
||||
TESTATOB(AR30, 4, 4, 1, ABGR, 4, 4, 1, 0)
|
||||
#ifdef LITTLE_ENDIAN_TEST
|
||||
#ifdef INTEL_TEST
|
||||
TESTATOB(AR30, 4, 4, 1, AR30, 4, 4, 1, 0)
|
||||
TESTATOB(AR30, 4, 4, 1, ARGB, 4, 4, 1, 0)
|
||||
#endif
|
||||
TESTATOB(ARGB, 4, 4, 1, ABGR, 4, 4, 1, 0)
|
||||
#ifdef LITTLE_ENDIAN_TEST
|
||||
#ifdef INTEL_TEST
|
||||
TESTATOB(ARGB, 4, 4, 1, AR30, 4, 4, 1, 0)
|
||||
#endif
|
||||
TESTATOB(ARGB, 4, 4, 1, ARGB, 4, 4, 1, 0)
|
||||
@ -1226,7 +1226,7 @@ TESTATOB(ARGB, 4, 4, 1, J400, 1, 1, 1, 0)
|
||||
TESTATOB(RGBA, 4, 4, 1, J400, 1, 1, 1, 0)
|
||||
TESTATOB(ARGB, 4, 4, 1, RAW, 3, 3, 1, 0)
|
||||
TESTATOB(ARGB, 4, 4, 1, RGB24, 3, 3, 1, 0)
|
||||
#ifdef LITTLE_ENDIAN_TEST
|
||||
#ifdef INTEL_TEST
|
||||
TESTATOB(ARGB, 4, 4, 1, RGB565, 2, 2, 1, 0)
|
||||
#endif
|
||||
TESTATOB(ARGB, 4, 4, 1, RGBA, 4, 4, 1, 0)
|
||||
@ -1245,7 +1245,7 @@ TESTATOB(RAW, 3, 3, 1, RGBA, 4, 4, 1, 0)
|
||||
TESTATOB(RAW, 3, 3, 1, RGB24, 3, 3, 1, 0)
|
||||
TESTATOB(RGB24, 3, 3, 1, ARGB, 4, 4, 1, 0)
|
||||
TESTATOB(RGB24, 3, 3, 1, J400, 1, 1, 1, 0)
|
||||
#ifdef LITTLE_ENDIAN_TEST
|
||||
#ifdef INTEL_TEST
|
||||
TESTATOB(RGB565, 2, 2, 1, ARGB, 4, 4, 1, 0)
|
||||
#endif
|
||||
TESTATOB(RGBA, 4, 4, 1, ARGB, 4, 4, 1, 0)
|
||||
@ -1348,7 +1348,7 @@ TESTATOB(YUY2, 2, 4, 1, Y, 1, 1, 1, 0)
|
||||
TESTATOBDRANDOM(FMT_A, BPP_A, STRIDE_A, HEIGHT_A, FMT_B, BPP_B, STRIDE_B, \
|
||||
HEIGHT_B, DIFF)
|
||||
|
||||
#ifdef LITTLE_ENDIAN_TEST
|
||||
#ifdef INTEL_TEST
|
||||
TESTATOBD(ARGB, 4, 4, 1, RGB565, 2, 2, 1, 0)
|
||||
#endif
|
||||
|
||||
@ -2447,7 +2447,7 @@ TEST_F(LibYUVConvertTest, TestDither) {
|
||||
TESTPLANARTOBID(FMT_PLANAR, SUBSAMP_X, SUBSAMP_Y, FMT_B, BPP_B, ALIGN, \
|
||||
YALIGN, benchmark_width_, DIFF, _Opt, +, 0, FMT_C, BPP_C)
|
||||
|
||||
#ifdef LITTLE_ENDIAN_TEST
|
||||
#ifdef INTEL_TEST
|
||||
TESTPLANARTOBD(I420, 2, 2, RGB565, 2, 2, 1, 9, ARGB, 4)
|
||||
#endif
|
||||
#define TESTPTOB(NAME, UYVYTOI420, UYVYTONV12) \
|
||||
@ -2591,7 +2591,7 @@ TESTPLANARTOE(H420, 2, 2, RAW, 1, 3, RGB24, 3)
|
||||
TESTPLANARTOE(H420, 2, 2, RGB24, 1, 3, RAW, 3)
|
||||
TESTPLANARTOE(H420, 2, 2, ARGB, 1, 4, RAW, 3)
|
||||
TESTPLANARTOE(H420, 2, 2, RAW, 1, 3, ARGB, 4)
|
||||
#ifdef LITTLE_ENDIAN_TEST
|
||||
#ifdef INTEL_TEST
|
||||
TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, RGB565, 2)
|
||||
TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB1555, 2)
|
||||
TESTPLANARTOE(I420, 2, 2, ARGB, 1, 4, ARGB4444, 2)
|
||||
@ -2738,7 +2738,7 @@ TESTQPLANARTOE(I420Alpha, 2, 2, ABGR, 1, 4, ARGB, 4)
|
||||
_Opt, +, 0, FMT_C, BPP_C)
|
||||
|
||||
// Caveat: Destination needs to be 4 bytes
|
||||
#ifdef LITTLE_ENDIAN_TEST
|
||||
#ifdef INTEL_TEST
|
||||
TESTPLANETOE(ARGB, 1, 4, AR30, 1, 4, ARGB, 4)
|
||||
TESTPLANETOE(ABGR, 1, 4, AR30, 1, 4, ABGR, 4)
|
||||
TESTPLANETOE(AR30, 1, 4, ARGB, 1, 4, ABGR, 4)
|
||||
@ -2929,7 +2929,7 @@ TESTPLANAR16TOB(H210, 2, 1, ARGB, 4, 4, 1, 2)
|
||||
TESTPLANAR16TOB(H210, 2, 1, ABGR, 4, 4, 1, 2)
|
||||
TESTPLANAR16TOB(U210, 2, 1, ARGB, 4, 4, 1, 2)
|
||||
TESTPLANAR16TOB(U210, 2, 1, ABGR, 4, 4, 1, 2)
|
||||
#ifdef LITTLE_ENDIAN_TEST
|
||||
#ifdef INTEL_TEST
|
||||
TESTPLANAR16TOB(I010, 2, 2, AR30, 4, 4, 1, 2)
|
||||
TESTPLANAR16TOB(I010, 2, 2, AB30, 4, 4, 1, 2)
|
||||
TESTPLANAR16TOB(H010, 2, 2, AR30, 4, 4, 1, 2)
|
||||
|
||||
@ -306,7 +306,9 @@ TEST_SCALETO(ARGBScale, 320, 240)
|
||||
TEST_SCALETO(ARGBScale, 569, 480)
|
||||
TEST_SCALETO(ARGBScale, 640, 360)
|
||||
TEST_SCALETO(ARGBScale, 1280, 720)
|
||||
#ifdef ENABLE_SLOW_TESTS
|
||||
TEST_SCALETO(ARGBScale, 1920, 1080)
|
||||
#endif // ENABLE_SLOW_TESTS
|
||||
#undef TEST_SCALETO1
|
||||
#undef TEST_SCALETO
|
||||
|
||||
|
||||
@ -500,7 +500,7 @@ static int I444TestFilter_16(int src_width,
|
||||
#define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2)
|
||||
#define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
|
||||
|
||||
#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \
|
||||
#define TEST_FACTOR1(DISABLED_, name, filter, nom, denom, max_diff) \
|
||||
TEST_F(LibYUVScaleTest, I420ScaleDownBy##name##_##filter) { \
|
||||
int diff = I420TestFilter( \
|
||||
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
|
||||
@ -517,7 +517,7 @@ static int I444TestFilter_16(int src_width,
|
||||
benchmark_cpu_info_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
} \
|
||||
TEST_F(LibYUVScaleTest, I420ScaleDownBy##name##_##filter##_16) { \
|
||||
TEST_F(LibYUVScaleTest, DISABLED_##I420ScaleDownBy##name##_##filter##_16) { \
|
||||
int diff = I420TestFilter_16( \
|
||||
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
|
||||
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
|
||||
@ -525,7 +525,7 @@ static int I444TestFilter_16(int src_width,
|
||||
benchmark_cpu_info_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
} \
|
||||
TEST_F(LibYUVScaleTest, I444ScaleDownBy##name##_##filter##_16) { \
|
||||
TEST_F(LibYUVScaleTest, DISABLED_##I444ScaleDownBy##name##_##filter##_16) { \
|
||||
int diff = I444TestFilter_16( \
|
||||
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
|
||||
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
|
||||
@ -536,11 +536,19 @@ static int I444TestFilter_16(int src_width,
|
||||
|
||||
// Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
|
||||
// filtering is different fixed point implementations for SSSE3, Neon and C.
|
||||
#ifdef ENABLE_SLOW_TESTS
|
||||
#define TEST_FACTOR(name, nom, denom, boxdiff) \
|
||||
TEST_FACTOR1(name, None, nom, denom, 0) \
|
||||
TEST_FACTOR1(name, Linear, nom, denom, 3) \
|
||||
TEST_FACTOR1(name, Bilinear, nom, denom, 3) \
|
||||
TEST_FACTOR1(name, Box, nom, denom, boxdiff)
|
||||
TEST_FACTOR1(, name, None, nom, denom, 0) \
|
||||
TEST_FACTOR1(, name, Linear, nom, denom, 3) \
|
||||
TEST_FACTOR1(, name, Bilinear, nom, denom, 3) \
|
||||
TEST_FACTOR1(, name, Box, nom, denom, boxdiff)
|
||||
#else
|
||||
#define TEST_FACTOR(name, nom, denom, boxdiff) \
|
||||
TEST_FACTOR1(DISABLED_, name, None, nom, denom, 0) \
|
||||
TEST_FACTOR1(DISABLED_, name, Linear, nom, denom, 3) \
|
||||
TEST_FACTOR1(DISABLED_, name, Bilinear, nom, denom, 3) \
|
||||
TEST_FACTOR1(DISABLED_, name, Box, nom, denom, boxdiff)
|
||||
#endif
|
||||
|
||||
TEST_FACTOR(2, 1, 2, 0)
|
||||
TEST_FACTOR(4, 1, 4, 0)
|
||||
@ -553,7 +561,7 @@ TEST_FACTOR(3, 1, 3, 0)
|
||||
#undef SX
|
||||
#undef DX
|
||||
|
||||
#define TEST_SCALETO1(name, width, height, filter, max_diff) \
|
||||
#define TEST_SCALETO1(DISABLED_, name, width, height, filter, max_diff) \
|
||||
TEST_F(LibYUVScaleTest, I420##name##To##width##x##height##_##filter) { \
|
||||
int diff = I420TestFilter(benchmark_width_, benchmark_height_, width, \
|
||||
height, kFilter##filter, benchmark_iterations_, \
|
||||
@ -566,13 +574,13 @@ TEST_FACTOR(3, 1, 3, 0)
|
||||
disable_cpu_flags_, benchmark_cpu_info_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
} \
|
||||
TEST_F(LibYUVScaleTest, I420##name##To##width##x##height##_##filter##_16) { \
|
||||
TEST_F(LibYUVScaleTest, DISABLED_##I420##name##To##width##x##height##_##filter##_16) { \
|
||||
int diff = I420TestFilter_16( \
|
||||
benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
|
||||
benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
} \
|
||||
TEST_F(LibYUVScaleTest, I444##name##To##width##x##height##_##filter##_16) { \
|
||||
TEST_F(LibYUVScaleTest, DISABLED_##I444##name##To##width##x##height##_##filter##_16) { \
|
||||
int diff = I444TestFilter_16( \
|
||||
benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
|
||||
benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
|
||||
@ -593,7 +601,7 @@ TEST_FACTOR(3, 1, 3, 0)
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
} \
|
||||
TEST_F(LibYUVScaleTest, \
|
||||
I420##name##From##width##x##height##_##filter##_16) { \
|
||||
DISABLED_##I420##name##From##width##x##height##_##filter##_16) { \
|
||||
int diff = I420TestFilter_16(width, height, Abs(benchmark_width_), \
|
||||
Abs(benchmark_height_), kFilter##filter, \
|
||||
benchmark_iterations_, disable_cpu_flags_, \
|
||||
@ -601,7 +609,7 @@ TEST_FACTOR(3, 1, 3, 0)
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
} \
|
||||
TEST_F(LibYUVScaleTest, \
|
||||
I444##name##From##width##x##height##_##filter##_16) { \
|
||||
DISABLED_##I444##name##From##width##x##height##_##filter##_16) { \
|
||||
int diff = I444TestFilter_16(width, height, Abs(benchmark_width_), \
|
||||
Abs(benchmark_height_), kFilter##filter, \
|
||||
benchmark_iterations_, disable_cpu_flags_, \
|
||||
@ -609,19 +617,30 @@ TEST_FACTOR(3, 1, 3, 0)
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
}
|
||||
|
||||
#ifdef ENABLE_SLOW_TESTS
|
||||
// Test scale to a specified size with all 4 filters.
|
||||
#define TEST_SCALETO(name, width, height) \
|
||||
TEST_SCALETO1(name, width, height, None, 0) \
|
||||
TEST_SCALETO1(name, width, height, Linear, 3) \
|
||||
TEST_SCALETO1(name, width, height, Bilinear, 3) \
|
||||
TEST_SCALETO1(name, width, height, Box, 3)
|
||||
TEST_SCALETO1(, name, width, height, None, 0) \
|
||||
TEST_SCALETO1(, name, width, height, Linear, 3) \
|
||||
TEST_SCALETO1(, name, width, height, Bilinear, 3) \
|
||||
TEST_SCALETO1(, name, width, height, Box, 3)
|
||||
#else
|
||||
// Test scale to a specified size with all 4 filters.
|
||||
#define TEST_SCALETO(name, width, height) \
|
||||
TEST_SCALETO1(DISABLED_, name, width, height, None, 0) \
|
||||
TEST_SCALETO1(DISABLED_, name, width, height, Linear, 3) \
|
||||
TEST_SCALETO1(DISABLED_, name, width, height, Bilinear, 3) \
|
||||
TEST_SCALETO1(DISABLED_, name, width, height, Box, 3)
|
||||
#endif
|
||||
|
||||
TEST_SCALETO(Scale, 1, 1)
|
||||
TEST_SCALETO(Scale, 320, 240)
|
||||
TEST_SCALETO(Scale, 569, 480)
|
||||
TEST_SCALETO(Scale, 640, 360)
|
||||
TEST_SCALETO(Scale, 1280, 720)
|
||||
#ifdef ENABLE_SLOW_TESTS
|
||||
TEST_SCALETO(Scale, 1920, 1080)
|
||||
#endif // ENABLE_SLOW_TESTS
|
||||
#undef TEST_SCALETO1
|
||||
#undef TEST_SCALETO
|
||||
|
||||
@ -879,7 +898,7 @@ static int TestPlaneFilter_16(int src_width,
|
||||
#define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
|
||||
|
||||
#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \
|
||||
TEST_F(LibYUVScaleTest, ScalePlaneDownBy##name##_##filter##_16) { \
|
||||
TEST_F(LibYUVScaleTest, DISABLED_##ScalePlaneDownBy##name##_##filter##_16) {\
|
||||
int diff = TestPlaneFilter_16( \
|
||||
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
|
||||
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user