mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 08:46:47 +08:00
PlaneScale, UVScale and ARGBScale test 3x and 4x down sample.
Intel SkylakeX UVTest3x (1925 ms) UVTest4x (2915 ms) PlaneTest3x (2040 ms) PlaneTest4x (4292 ms) ARGBTest3x (2079 ms) ARGBTest4x (1854 ms) Pixel 2 ARGBTest3x (3602 ms) ARGBTest4x (4064 ms) PlaneTest3x (3331 ms) PlaneTest4x (8977 ms) UVTest3x (3473 ms) UVTest4x (6970 ms) Bug: b/171798872, b/171884264 Change-Id: Iebc70fed907857b6cb71a9baf2aba9861ef1e3f7 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2505601 Reviewed-by: richard winterton <rrwinterton@gmail.com> Commit-Queue: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
parent
5c4dc242f4
commit
cec28e7088
4
BUILD.gn
4
BUILD.gn
@ -376,7 +376,9 @@ if (libyuv_include_tests) {
|
|||||||
# sources
|
# sources
|
||||||
"util/i444tonv12_eg.cc",
|
"util/i444tonv12_eg.cc",
|
||||||
]
|
]
|
||||||
deps = [ ":libyuv" ]
|
deps = [
|
||||||
|
":libyuv",
|
||||||
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
executable("cpuid") {
|
executable("cpuid") {
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 1766
|
Version: 1767
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -460,9 +460,16 @@ int MJPGToNV21(const uint8_t* sample,
|
|||||||
|
|
||||||
// JPEG to NV12
|
// JPEG to NV12
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int MJPGToNV12(const uint8_t* sample, size_t sample_size, uint8_t* dst_y,
|
int MJPGToNV12(const uint8_t* sample,
|
||||||
int dst_stride_y, uint8_t* dst_uv, int dst_stride_uv,
|
size_t sample_size,
|
||||||
int src_width, int src_height, int dst_width, int dst_height);
|
uint8_t* dst_y,
|
||||||
|
int dst_stride_y,
|
||||||
|
uint8_t* dst_uv,
|
||||||
|
int dst_stride_uv,
|
||||||
|
int src_width,
|
||||||
|
int src_height,
|
||||||
|
int dst_width,
|
||||||
|
int dst_height);
|
||||||
|
|
||||||
// Query size of MJPG in pixels.
|
// Query size of MJPG in pixels.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
|
|||||||
@ -79,7 +79,7 @@ int ARGBToAR30(const uint8_t* src_argb,
|
|||||||
|
|
||||||
// Aliases
|
// Aliases
|
||||||
#define ABGRToRGB24 ARGBToRAW
|
#define ABGRToRGB24 ARGBToRAW
|
||||||
#define ABGRToRAW ARGBToRGB24
|
#define ABGRToRAW ARGBToRGB24
|
||||||
|
|
||||||
// Convert ARGB To RGB24.
|
// Convert ARGB To RGB24.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
|
|||||||
@ -82,7 +82,7 @@ extern "C" {
|
|||||||
// The following are available for gcc/clang x86 platforms, but
|
// The following are available for gcc/clang x86 platforms, but
|
||||||
// require clang 3.4 or gcc 4.7.
|
// require clang 3.4 or gcc 4.7.
|
||||||
// TODO(fbarchard): Port to Visual C
|
// TODO(fbarchard): Port to Visual C
|
||||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||||
(defined(__x86_64__) || defined(__i386__)) && !defined(_MSC_VER) && \
|
(defined(__x86_64__) || defined(__i386__)) && !defined(_MSC_VER) && \
|
||||||
(defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
|
(defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
|
||||||
#define HAS_SCALEUVROWDOWN2BOX_AVX2
|
#define HAS_SCALEUVROWDOWN2BOX_AVX2
|
||||||
@ -848,17 +848,17 @@ void ScaleARGBRowDownEvenBox_Any_MMI(const uint8_t* src_ptr,
|
|||||||
|
|
||||||
// UV Row functions
|
// UV Row functions
|
||||||
void ScaleUVRowDown2_SSSE3(const uint8_t* src_ptr,
|
void ScaleUVRowDown2_SSSE3(const uint8_t* src_ptr,
|
||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
uint8_t* dst_uv,
|
uint8_t* dst_uv,
|
||||||
int dst_width);
|
int dst_width);
|
||||||
void ScaleUVRowDown2Linear_SSSE3(const uint8_t* src_ptr,
|
void ScaleUVRowDown2Linear_SSSE3(const uint8_t* src_ptr,
|
||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
uint8_t* dst_uv,
|
uint8_t* dst_uv,
|
||||||
int dst_width);
|
int dst_width);
|
||||||
void ScaleUVRowDown2Box_SSSE3(const uint8_t* src_ptr,
|
void ScaleUVRowDown2Box_SSSE3(const uint8_t* src_ptr,
|
||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
uint8_t* dst_uv,
|
uint8_t* dst_uv,
|
||||||
int dst_width);
|
int dst_width);
|
||||||
void ScaleUVRowDown2Box_AVX2(const uint8_t* src_ptr,
|
void ScaleUVRowDown2Box_AVX2(const uint8_t* src_ptr,
|
||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
uint8_t* dst_uv,
|
uint8_t* dst_uv,
|
||||||
@ -952,15 +952,15 @@ void ScaleUVRowDown2Box_Any_MMI(const uint8_t* src_ptr,
|
|||||||
uint8_t* dst_ptr,
|
uint8_t* dst_ptr,
|
||||||
int dst_width);
|
int dst_width);
|
||||||
void ScaleUVRowDownEven_SSSE3(const uint8_t* src_ptr,
|
void ScaleUVRowDownEven_SSSE3(const uint8_t* src_ptr,
|
||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
int src_stepx,
|
int src_stepx,
|
||||||
uint8_t* dst_uv,
|
uint8_t* dst_uv,
|
||||||
int dst_width);
|
int dst_width);
|
||||||
void ScaleUVRowDownEvenBox_SSSE3(const uint8_t* src_ptr,
|
void ScaleUVRowDownEvenBox_SSSE3(const uint8_t* src_ptr,
|
||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
int src_stepx,
|
int src_stepx,
|
||||||
uint8_t* dst_uv,
|
uint8_t* dst_uv,
|
||||||
int dst_width);
|
int dst_width);
|
||||||
void ScaleUVRowDownEven_NEON(const uint8_t* src_ptr,
|
void ScaleUVRowDownEven_NEON(const uint8_t* src_ptr,
|
||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
int src_stepx,
|
int src_stepx,
|
||||||
@ -992,15 +992,15 @@ void ScaleUVRowDownEvenBox_MMI(const uint8_t* src_ptr,
|
|||||||
uint8_t* dst_uv,
|
uint8_t* dst_uv,
|
||||||
int dst_width);
|
int dst_width);
|
||||||
void ScaleUVRowDownEven_Any_SSSE3(const uint8_t* src_ptr,
|
void ScaleUVRowDownEven_Any_SSSE3(const uint8_t* src_ptr,
|
||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
int src_stepx,
|
int src_stepx,
|
||||||
uint8_t* dst_ptr,
|
uint8_t* dst_ptr,
|
||||||
int dst_width);
|
int dst_width);
|
||||||
void ScaleUVRowDownEvenBox_Any_SSSE3(const uint8_t* src_ptr,
|
void ScaleUVRowDownEvenBox_Any_SSSE3(const uint8_t* src_ptr,
|
||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
int src_stepx,
|
int src_stepx,
|
||||||
uint8_t* dst_ptr,
|
uint8_t* dst_ptr,
|
||||||
int dst_width);
|
int dst_width);
|
||||||
void ScaleUVRowDownEven_Any_NEON(const uint8_t* src_ptr,
|
void ScaleUVRowDownEven_Any_NEON(const uint8_t* src_ptr,
|
||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
int src_stepx,
|
int src_stepx,
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 1766
|
#define LIBYUV_VERSION 1767
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|||||||
@ -328,8 +328,10 @@ int MJPGToNV21(const uint8_t* src_mjpg,
|
|||||||
return ret ? 0 : 1;
|
return ret ? 0 : 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void JpegI420ToNV12(void* opaque, const uint8_t* const* data,
|
static void JpegI420ToNV12(void* opaque,
|
||||||
const int* strides, int rows) {
|
const uint8_t* const* data,
|
||||||
|
const int* strides,
|
||||||
|
int rows) {
|
||||||
NV21Buffers* dest = (NV21Buffers*)(opaque);
|
NV21Buffers* dest = (NV21Buffers*)(opaque);
|
||||||
// Use NV21 with VU swapped.
|
// Use NV21 with VU swapped.
|
||||||
I420ToNV21(data[0], strides[0], data[2], strides[2], data[1], strides[1],
|
I420ToNV21(data[0], strides[0], data[2], strides[2], data[1], strides[1],
|
||||||
@ -339,8 +341,10 @@ static void JpegI420ToNV12(void* opaque, const uint8_t* const* data,
|
|||||||
dest->h -= rows;
|
dest->h -= rows;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void JpegI422ToNV12(void* opaque, const uint8_t* const* data,
|
static void JpegI422ToNV12(void* opaque,
|
||||||
const int* strides, int rows) {
|
const uint8_t* const* data,
|
||||||
|
const int* strides,
|
||||||
|
int rows) {
|
||||||
NV21Buffers* dest = (NV21Buffers*)(opaque);
|
NV21Buffers* dest = (NV21Buffers*)(opaque);
|
||||||
// Use NV21 with VU swapped.
|
// Use NV21 with VU swapped.
|
||||||
I422ToNV21(data[0], strides[0], data[2], strides[2], data[1], strides[1],
|
I422ToNV21(data[0], strides[0], data[2], strides[2], data[1], strides[1],
|
||||||
@ -350,8 +354,10 @@ static void JpegI422ToNV12(void* opaque, const uint8_t* const* data,
|
|||||||
dest->h -= rows;
|
dest->h -= rows;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void JpegI444ToNV12(void* opaque, const uint8_t* const* data,
|
static void JpegI444ToNV12(void* opaque,
|
||||||
const int* strides, int rows) {
|
const uint8_t* const* data,
|
||||||
|
const int* strides,
|
||||||
|
int rows) {
|
||||||
NV21Buffers* dest = (NV21Buffers*)(opaque);
|
NV21Buffers* dest = (NV21Buffers*)(opaque);
|
||||||
// Use NV21 with VU swapped.
|
// Use NV21 with VU swapped.
|
||||||
I444ToNV21(data[0], strides[0], data[2], strides[2], data[1], strides[1],
|
I444ToNV21(data[0], strides[0], data[2], strides[2], data[1], strides[1],
|
||||||
@ -361,8 +367,10 @@ static void JpegI444ToNV12(void* opaque, const uint8_t* const* data,
|
|||||||
dest->h -= rows;
|
dest->h -= rows;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void JpegI400ToNV12(void* opaque, const uint8_t* const* data,
|
static void JpegI400ToNV12(void* opaque,
|
||||||
const int* strides, int rows) {
|
const uint8_t* const* data,
|
||||||
|
const int* strides,
|
||||||
|
int rows) {
|
||||||
NV21Buffers* dest = (NV21Buffers*)(opaque);
|
NV21Buffers* dest = (NV21Buffers*)(opaque);
|
||||||
// Use NV21 since there is no UV plane.
|
// Use NV21 since there is no UV plane.
|
||||||
I400ToNV21(data[0], strides[0], dest->y, dest->y_stride, dest->vu,
|
I400ToNV21(data[0], strides[0], dest->y, dest->y_stride, dest->vu,
|
||||||
@ -374,9 +382,16 @@ static void JpegI400ToNV12(void* opaque, const uint8_t* const* data,
|
|||||||
|
|
||||||
// MJPG (Motion JPEG) to NV12.
|
// MJPG (Motion JPEG) to NV12.
|
||||||
LIBYUV_API
|
LIBYUV_API
|
||||||
int MJPGToNV12(const uint8_t* sample, size_t sample_size, uint8_t* dst_y,
|
int MJPGToNV12(const uint8_t* sample,
|
||||||
int dst_stride_y, uint8_t* dst_uv, int dst_stride_uv,
|
size_t sample_size,
|
||||||
int src_width, int src_height, int dst_width, int dst_height) {
|
uint8_t* dst_y,
|
||||||
|
int dst_stride_y,
|
||||||
|
uint8_t* dst_uv,
|
||||||
|
int dst_stride_uv,
|
||||||
|
int src_width,
|
||||||
|
int src_height,
|
||||||
|
int dst_width,
|
||||||
|
int dst_height) {
|
||||||
if (sample_size == kUnknownDataSize) {
|
if (sample_size == kUnknownDataSize) {
|
||||||
// ERROR: MJPEG frame size unknown
|
// ERROR: MJPEG frame size unknown
|
||||||
return -1;
|
return -1;
|
||||||
|
|||||||
@ -1063,9 +1063,11 @@ void ScaleUVRowDown2Box_C(const uint8_t* src_uv,
|
|||||||
int x;
|
int x;
|
||||||
for (x = 0; x < dst_width; ++x) {
|
for (x = 0; x < dst_width; ++x) {
|
||||||
dst_uv[0] = (src_uv[0] + src_uv[2] + src_uv[src_stride] +
|
dst_uv[0] = (src_uv[0] + src_uv[2] + src_uv[src_stride] +
|
||||||
src_uv[src_stride + 2] + 2) >> 2;
|
src_uv[src_stride + 2] + 2) >>
|
||||||
|
2;
|
||||||
dst_uv[1] = (src_uv[1] + src_uv[3] + src_uv[src_stride + 1] +
|
dst_uv[1] = (src_uv[1] + src_uv[3] + src_uv[src_stride + 1] +
|
||||||
src_uv[src_stride + 3] + 2) >> 2;
|
src_uv[src_stride + 3] + 2) >>
|
||||||
|
2;
|
||||||
src_uv += 4;
|
src_uv += 4;
|
||||||
dst_uv += 2;
|
dst_uv += 2;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1370,8 +1370,9 @@ int FixedDiv1_X86(int num, int div) {
|
|||||||
// Shuffle table for splitting UV into upper and lower part of register.
|
// Shuffle table for splitting UV into upper and lower part of register.
|
||||||
static const uvec8 kShuffleSplitUV = {0u, 2u, 4u, 6u, 8u, 10u, 12u, 14u,
|
static const uvec8 kShuffleSplitUV = {0u, 2u, 4u, 6u, 8u, 10u, 12u, 14u,
|
||||||
1u, 3u, 5u, 7u, 9u, 11u, 13u, 15u};
|
1u, 3u, 5u, 7u, 9u, 11u, 13u, 15u};
|
||||||
static const uvec8 kShuffleMergeUV = {0u, 8u, 2u, 10u, 4u, 12u, 6u, 14u,
|
static const uvec8 kShuffleMergeUV = {0u, 8u, 2u, 10u, 4u, 12u,
|
||||||
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80};
|
6u, 14u, 0x80, 0x80, 0x80, 0x80,
|
||||||
|
0x80, 0x80, 0x80, 0x80};
|
||||||
|
|
||||||
void ScaleUVRowDown2Box_SSSE3(const uint8_t* src_ptr,
|
void ScaleUVRowDown2Box_SSSE3(const uint8_t* src_ptr,
|
||||||
ptrdiff_t src_stride,
|
ptrdiff_t src_stride,
|
||||||
@ -1402,15 +1403,15 @@ void ScaleUVRowDown2Box_SSSE3(const uint8_t* src_ptr,
|
|||||||
"lea 0x8(%1),%1 \n" // 4 UV
|
"lea 0x8(%1),%1 \n" // 4 UV
|
||||||
"sub $0x4,%2 \n"
|
"sub $0x4,%2 \n"
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
: "+r"(src_ptr), // %0
|
: "+r"(src_ptr), // %0
|
||||||
"+r"(dst_ptr), // %1
|
"+r"(dst_ptr), // %1
|
||||||
"+r"(dst_width) // %2
|
"+r"(dst_width) // %2
|
||||||
: "r"((intptr_t)(src_stride)), // %3
|
: "r"((intptr_t)(src_stride)), // %3
|
||||||
"m"(kShuffleSplitUV), // %4
|
"m"(kShuffleSplitUV), // %4
|
||||||
"m"(kShuffleMergeUV) // %5
|
"m"(kShuffleMergeUV) // %5
|
||||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
|
||||||
}
|
}
|
||||||
#endif // HAS_SCALEUVROWDOWN2BOX_SSSE3
|
#endif // HAS_SCALEUVROWDOWN2BOX_SSSE3
|
||||||
|
|
||||||
#ifdef HAS_SCALEUVROWDOWN2BOX_AVX2
|
#ifdef HAS_SCALEUVROWDOWN2BOX_AVX2
|
||||||
void ScaleUVRowDown2Box_AVX2(const uint8_t* src_ptr,
|
void ScaleUVRowDown2Box_AVX2(const uint8_t* src_ptr,
|
||||||
@ -1444,15 +1445,15 @@ void ScaleUVRowDown2Box_AVX2(const uint8_t* src_ptr,
|
|||||||
"sub $0x8,%2 \n"
|
"sub $0x8,%2 \n"
|
||||||
"jg 1b \n"
|
"jg 1b \n"
|
||||||
"vzeroupper \n"
|
"vzeroupper \n"
|
||||||
: "+r"(src_ptr), // %0
|
: "+r"(src_ptr), // %0
|
||||||
"+r"(dst_ptr), // %1
|
"+r"(dst_ptr), // %1
|
||||||
"+r"(dst_width) // %2
|
"+r"(dst_width) // %2
|
||||||
: "r"((intptr_t)(src_stride)), // %3
|
: "r"((intptr_t)(src_stride)), // %3
|
||||||
"m"(kShuffleSplitUV), // %4
|
"m"(kShuffleSplitUV), // %4
|
||||||
"m"(kShuffleMergeUV) // %5
|
"m"(kShuffleMergeUV) // %5
|
||||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
|
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
|
||||||
}
|
}
|
||||||
#endif // HAS_SCALEUVROWDOWN2BOX_AVX2
|
#endif // HAS_SCALEUVROWDOWN2BOX_AVX2
|
||||||
|
|
||||||
#endif // defined(__x86_64__) || defined(__i386__)
|
#endif // defined(__x86_64__) || defined(__i386__)
|
||||||
|
|
||||||
|
|||||||
@ -456,4 +456,79 @@ TEST_F(LibYUVScaleTest, YUVToRGBScaleDown) {
|
|||||||
EXPECT_LE(diff, 10);
|
EXPECT_LE(diff, 10);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(LibYUVScaleTest, ARGBTest3x) {
|
||||||
|
const int kSrcStride = 48 * 4;
|
||||||
|
const int kDstStride = 16 * 4;
|
||||||
|
const int kSize = kSrcStride * 3;
|
||||||
|
align_buffer_page_end(orig_pixels, kSize);
|
||||||
|
for (int i = 0; i < 48 * 3; ++i) {
|
||||||
|
orig_pixels[i * 4 + 0] = i;
|
||||||
|
orig_pixels[i * 4 + 1] = 255 - i;
|
||||||
|
orig_pixels[i * 4 + 2] = i + 1;
|
||||||
|
orig_pixels[i * 4 + 3] = i + 10;
|
||||||
|
}
|
||||||
|
align_buffer_page_end(dest_pixels, kDstStride);
|
||||||
|
|
||||||
|
int iterations16 =
|
||||||
|
benchmark_width_ * benchmark_height_ / (16 * 1) * benchmark_iterations_;
|
||||||
|
for (int i = 0; i < iterations16; ++i) {
|
||||||
|
ARGBScale(orig_pixels, kSrcStride, 48, 3, dest_pixels, kDstStride, 16, 1,
|
||||||
|
kFilterBilinear);
|
||||||
|
}
|
||||||
|
|
||||||
|
EXPECT_EQ(49, dest_pixels[0]);
|
||||||
|
EXPECT_EQ(255 - 49, dest_pixels[1]);
|
||||||
|
EXPECT_EQ(50, dest_pixels[2]);
|
||||||
|
EXPECT_EQ(59, dest_pixels[3]);
|
||||||
|
|
||||||
|
ARGBScale(orig_pixels, kSrcStride, 48, 3, dest_pixels, kDstStride, 16, 1,
|
||||||
|
kFilterNone);
|
||||||
|
|
||||||
|
EXPECT_EQ(49, dest_pixels[0]);
|
||||||
|
EXPECT_EQ(255 - 49, dest_pixels[1]);
|
||||||
|
EXPECT_EQ(50, dest_pixels[2]);
|
||||||
|
EXPECT_EQ(59, dest_pixels[3]);
|
||||||
|
|
||||||
|
free_aligned_buffer_page_end(dest_pixels);
|
||||||
|
free_aligned_buffer_page_end(orig_pixels);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(LibYUVScaleTest, ARGBTest4x) {
|
||||||
|
const int kSrcStride = 64 * 4;
|
||||||
|
const int kDstStride = 16 * 4;
|
||||||
|
const int kSize = kSrcStride * 4;
|
||||||
|
align_buffer_page_end(orig_pixels, kSize);
|
||||||
|
for (int i = 0; i < 64 * 4; ++i) {
|
||||||
|
orig_pixels[i * 4 + 0] = i;
|
||||||
|
orig_pixels[i * 4 + 1] = 255 - i;
|
||||||
|
orig_pixels[i * 4 + 2] = i + 1;
|
||||||
|
orig_pixels[i * 4 + 3] = i + 10;
|
||||||
|
}
|
||||||
|
align_buffer_page_end(dest_pixels, kDstStride);
|
||||||
|
|
||||||
|
int iterations16 =
|
||||||
|
benchmark_width_ * benchmark_height_ / (16 * 1) * benchmark_iterations_;
|
||||||
|
for (int i = 0; i < iterations16; ++i) {
|
||||||
|
ARGBScale(orig_pixels, kSrcStride, 64, 4, dest_pixels, kDstStride, 16, 1,
|
||||||
|
kFilterBilinear);
|
||||||
|
}
|
||||||
|
|
||||||
|
EXPECT_NEAR((65 + 66 + 129 + 130 + 2) / 4, dest_pixels[0], 4);
|
||||||
|
EXPECT_NEAR((255 - 65 + 255 - 66 + 255 - 129 + 255 - 130 + 2) / 4,
|
||||||
|
dest_pixels[1], 4);
|
||||||
|
EXPECT_NEAR((1 * 4 + 65 + 66 + 129 + 130 + 2) / 4, dest_pixels[2], 4);
|
||||||
|
EXPECT_NEAR((10 * 4 + 65 + 66 + 129 + 130 + 2) / 4, dest_pixels[3], 4);
|
||||||
|
|
||||||
|
ARGBScale(orig_pixels, kSrcStride, 64, 4, dest_pixels, kDstStride, 16, 1,
|
||||||
|
kFilterNone);
|
||||||
|
|
||||||
|
EXPECT_EQ(130, dest_pixels[0]);
|
||||||
|
EXPECT_EQ(255 - 130, dest_pixels[1]);
|
||||||
|
EXPECT_EQ(130 + 1, dest_pixels[2]);
|
||||||
|
EXPECT_EQ(130 + 10, dest_pixels[3]);
|
||||||
|
|
||||||
|
free_aligned_buffer_page_end(dest_pixels);
|
||||||
|
free_aligned_buffer_page_end(orig_pixels);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace libyuv
|
} // namespace libyuv
|
||||||
|
|||||||
@ -22,13 +22,13 @@ namespace libyuv {
|
|||||||
|
|
||||||
// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
|
// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
|
||||||
static int UVTestFilter(int src_width,
|
static int UVTestFilter(int src_width,
|
||||||
int src_height,
|
int src_height,
|
||||||
int dst_width,
|
int dst_width,
|
||||||
int dst_height,
|
int dst_height,
|
||||||
FilterMode f,
|
FilterMode f,
|
||||||
int benchmark_iterations,
|
int benchmark_iterations,
|
||||||
int disable_cpu_flags,
|
int disable_cpu_flags,
|
||||||
int benchmark_cpu_info) {
|
int benchmark_cpu_info) {
|
||||||
if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
|
if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -46,8 +46,7 @@ static int UVTestFilter(int src_width,
|
|||||||
}
|
}
|
||||||
MemRandomize(src_uv, src_uv_plane_size);
|
MemRandomize(src_uv, src_uv_plane_size);
|
||||||
|
|
||||||
int64_t dst_uv_plane_size =
|
int64_t dst_uv_plane_size = (dst_width + b * 2) * (dst_height + b * 2) * 2LL;
|
||||||
(dst_width + b * 2) * (dst_height + b * 2) * 2LL;
|
|
||||||
int dst_stride_uv = (b * 2 + dst_width) * 2;
|
int dst_stride_uv = (b * 2 + dst_width) * 2;
|
||||||
|
|
||||||
align_buffer_page_end(dst_uv_c, dst_uv_plane_size);
|
align_buffer_page_end(dst_uv_c, dst_uv_plane_size);
|
||||||
@ -61,29 +60,28 @@ static int UVTestFilter(int src_width,
|
|||||||
|
|
||||||
// Warm up both versions for consistent benchmarks.
|
// Warm up both versions for consistent benchmarks.
|
||||||
MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
|
MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
|
||||||
UVScale(src_uv + (src_stride_uv * b) + b * 2, src_stride_uv,
|
UVScale(src_uv + (src_stride_uv * b) + b * 2, src_stride_uv, src_width,
|
||||||
src_width, src_height, dst_uv_c + (dst_stride_uv * b) + b * 2,
|
src_height, dst_uv_c + (dst_stride_uv * b) + b * 2, dst_stride_uv,
|
||||||
dst_stride_uv, dst_width, dst_height, f);
|
dst_width, dst_height, f);
|
||||||
MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
|
MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
|
||||||
UVScale(src_uv + (src_stride_uv * b) + b * 2, src_stride_uv,
|
UVScale(src_uv + (src_stride_uv * b) + b * 2, src_stride_uv, src_width,
|
||||||
src_width, src_height, dst_uv_opt + (dst_stride_uv * b) + b * 2,
|
src_height, dst_uv_opt + (dst_stride_uv * b) + b * 2, dst_stride_uv,
|
||||||
dst_stride_uv, dst_width, dst_height, f);
|
dst_width, dst_height, f);
|
||||||
|
|
||||||
MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
|
MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
|
||||||
double c_time = get_time();
|
double c_time = get_time();
|
||||||
UVScale(src_uv + (src_stride_uv * b) + b * 2, src_stride_uv,
|
UVScale(src_uv + (src_stride_uv * b) + b * 2, src_stride_uv, src_width,
|
||||||
src_width, src_height, dst_uv_c + (dst_stride_uv * b) + b * 2,
|
src_height, dst_uv_c + (dst_stride_uv * b) + b * 2, dst_stride_uv,
|
||||||
dst_stride_uv, dst_width, dst_height, f);
|
dst_width, dst_height, f);
|
||||||
|
|
||||||
c_time = (get_time() - c_time);
|
c_time = (get_time() - c_time);
|
||||||
|
|
||||||
MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
|
MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
|
||||||
double opt_time = get_time();
|
double opt_time = get_time();
|
||||||
for (i = 0; i < benchmark_iterations; ++i) {
|
for (i = 0; i < benchmark_iterations; ++i) {
|
||||||
UVScale(src_uv + (src_stride_uv * b) + b * 2, src_stride_uv,
|
UVScale(src_uv + (src_stride_uv * b) + b * 2, src_stride_uv, src_width,
|
||||||
src_width, src_height,
|
src_height, dst_uv_opt + (dst_stride_uv * b) + b * 2, dst_stride_uv,
|
||||||
dst_uv_opt + (dst_stride_uv * b) + b * 2, dst_stride_uv,
|
dst_width, dst_height, f);
|
||||||
dst_width, dst_height, f);
|
|
||||||
}
|
}
|
||||||
opt_time = (get_time() - opt_time) / benchmark_iterations;
|
opt_time = (get_time() - opt_time) / benchmark_iterations;
|
||||||
|
|
||||||
@ -118,8 +116,8 @@ static int UVTestFilter(int src_width,
|
|||||||
#define SX(x, nom, denom) static_cast<int>((x / nom) * denom)
|
#define SX(x, nom, denom) static_cast<int>((x / nom) * denom)
|
||||||
|
|
||||||
#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \
|
#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \
|
||||||
TEST_F(LibYUVScaleTest, UVScaleDownBy##name##_##filter) { \
|
TEST_F(LibYUVScaleTest, UVScaleDownBy##name##_##filter) { \
|
||||||
int diff = UVTestFilter( \
|
int diff = UVTestFilter( \
|
||||||
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
|
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
|
||||||
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
|
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
|
||||||
kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
|
kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
|
||||||
@ -146,19 +144,19 @@ TEST_FACTOR(3, 1, 3)
|
|||||||
#undef SX
|
#undef SX
|
||||||
#undef DX
|
#undef DX
|
||||||
|
|
||||||
#define TEST_SCALETO1(name, width, height, filter, max_diff) \
|
#define TEST_SCALETO1(name, width, height, filter, max_diff) \
|
||||||
TEST_F(LibYUVScaleTest, name##To##width##x##height##_##filter) { \
|
TEST_F(LibYUVScaleTest, name##To##width##x##height##_##filter) { \
|
||||||
int diff = UVTestFilter(benchmark_width_, benchmark_height_, width, \
|
int diff = UVTestFilter(benchmark_width_, benchmark_height_, width, \
|
||||||
height, kFilter##filter, benchmark_iterations_, \
|
height, kFilter##filter, benchmark_iterations_, \
|
||||||
disable_cpu_flags_, benchmark_cpu_info_); \
|
disable_cpu_flags_, benchmark_cpu_info_); \
|
||||||
EXPECT_LE(diff, max_diff); \
|
EXPECT_LE(diff, max_diff); \
|
||||||
} \
|
} \
|
||||||
TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter) { \
|
TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter) { \
|
||||||
int diff = UVTestFilter(width, height, Abs(benchmark_width_), \
|
int diff = UVTestFilter(width, height, Abs(benchmark_width_), \
|
||||||
Abs(benchmark_height_), kFilter##filter, \
|
Abs(benchmark_height_), kFilter##filter, \
|
||||||
benchmark_iterations_, disable_cpu_flags_, \
|
benchmark_iterations_, disable_cpu_flags_, \
|
||||||
benchmark_cpu_info_); \
|
benchmark_cpu_info_); \
|
||||||
EXPECT_LE(diff, max_diff); \
|
EXPECT_LE(diff, max_diff); \
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Test scale to a specified size with all 4 filters.
|
/// Test scale to a specified size with all 4 filters.
|
||||||
@ -178,4 +176,67 @@ TEST_SCALETO(UVScale, 1920, 1080)
|
|||||||
#undef TEST_SCALETO1
|
#undef TEST_SCALETO1
|
||||||
#undef TEST_SCALETO
|
#undef TEST_SCALETO
|
||||||
|
|
||||||
|
TEST_F(LibYUVScaleTest, UVTest3x) {
|
||||||
|
const int kSrcStride = 48 * 2;
|
||||||
|
const int kDstStride = 16 * 2;
|
||||||
|
const int kSize = kSrcStride * 3;
|
||||||
|
align_buffer_page_end(orig_pixels, kSize);
|
||||||
|
for (int i = 0; i < 48 * 3; ++i) {
|
||||||
|
orig_pixels[i * 2 + 0] = i;
|
||||||
|
orig_pixels[i * 2 + 1] = 255 - i;
|
||||||
|
}
|
||||||
|
align_buffer_page_end(dest_pixels, kDstStride);
|
||||||
|
|
||||||
|
int iterations16 =
|
||||||
|
benchmark_width_ * benchmark_height_ / (16 * 1) * benchmark_iterations_;
|
||||||
|
for (int i = 0; i < iterations16; ++i) {
|
||||||
|
UVScale(orig_pixels, kSrcStride, 48, 3, dest_pixels, kDstStride, 16, 1,
|
||||||
|
kFilterBilinear);
|
||||||
|
}
|
||||||
|
|
||||||
|
EXPECT_EQ(49, dest_pixels[0]);
|
||||||
|
EXPECT_EQ(255 - 49, dest_pixels[1]);
|
||||||
|
|
||||||
|
UVScale(orig_pixels, kSrcStride, 48, 3, dest_pixels, kDstStride, 16, 1,
|
||||||
|
kFilterNone);
|
||||||
|
|
||||||
|
EXPECT_EQ(49, dest_pixels[0]);
|
||||||
|
EXPECT_EQ(255 - 49, dest_pixels[1]);
|
||||||
|
|
||||||
|
free_aligned_buffer_page_end(dest_pixels);
|
||||||
|
free_aligned_buffer_page_end(orig_pixels);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(LibYUVScaleTest, UVTest4x) {
|
||||||
|
const int kSrcStride = 64 * 2;
|
||||||
|
const int kDstStride = 16 * 2;
|
||||||
|
const int kSize = kSrcStride * 4;
|
||||||
|
align_buffer_page_end(orig_pixels, kSize);
|
||||||
|
for (int i = 0; i < 64 * 4; ++i) {
|
||||||
|
orig_pixels[i * 2 + 0] = i;
|
||||||
|
orig_pixels[i * 2 + 1] = 255 - i;
|
||||||
|
}
|
||||||
|
align_buffer_page_end(dest_pixels, kDstStride);
|
||||||
|
|
||||||
|
int iterations16 =
|
||||||
|
benchmark_width_ * benchmark_height_ / (16 * 1) * benchmark_iterations_;
|
||||||
|
for (int i = 0; i < iterations16; ++i) {
|
||||||
|
UVScale(orig_pixels, kSrcStride, 64, 4, dest_pixels, kDstStride, 16, 1,
|
||||||
|
kFilterBilinear);
|
||||||
|
}
|
||||||
|
|
||||||
|
EXPECT_EQ((65 + 66 + 129 + 130 + 2) / 4, dest_pixels[0]);
|
||||||
|
EXPECT_EQ((255 - 65 + 255 - 66 + 255 - 129 + 255 - 130 + 2) / 4,
|
||||||
|
dest_pixels[1]);
|
||||||
|
|
||||||
|
UVScale(orig_pixels, kSrcStride, 64, 4, dest_pixels, kDstStride, 16, 1,
|
||||||
|
kFilterNone);
|
||||||
|
|
||||||
|
EXPECT_EQ(130, dest_pixels[0]); // expect the 3rd pixel of the 3rd row
|
||||||
|
EXPECT_EQ(255 - 130, dest_pixels[1]);
|
||||||
|
|
||||||
|
free_aligned_buffer_page_end(dest_pixels);
|
||||||
|
free_aligned_buffer_page_end(orig_pixels);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace libyuv
|
} // namespace libyuv
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user