PlaneScale, UVScale and ARGBScale test 3x and 4x down sample.

Intel SkylakeX
UVTest3x (1925 ms)
UVTest4x (2915 ms)
PlaneTest3x (2040 ms)
PlaneTest4x (4292 ms)
ARGBTest3x (2079 ms)
ARGBTest4x (1854 ms)

Pixel 2
ARGBTest3x (3602 ms)
ARGBTest4x (4064 ms)
PlaneTest3x (3331 ms)
PlaneTest4x (8977 ms)
UVTest3x (3473 ms)
UVTest4x (6970 ms)

Bug: b/171798872, b/171884264
Change-Id: Iebc70fed907857b6cb71a9baf2aba9861ef1e3f7
Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/2505601
Reviewed-by: richard winterton <rrwinterton@gmail.com>
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
This commit is contained in:
Frank Barchard 2020-10-28 11:31:10 -07:00 committed by Commit Bot
parent 5c4dc242f4
commit cec28e7088
11 changed files with 262 additions and 99 deletions

View File

@ -376,7 +376,9 @@ if (libyuv_include_tests) {
# sources
"util/i444tonv12_eg.cc",
]
deps = [ ":libyuv" ]
deps = [
":libyuv",
]
}
executable("cpuid") {

View File

@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1766
Version: 1767
License: BSD
License File: LICENSE

View File

@ -460,9 +460,16 @@ int MJPGToNV21(const uint8_t* sample,
// JPEG to NV12
LIBYUV_API
int MJPGToNV12(const uint8_t* sample, size_t sample_size, uint8_t* dst_y,
int dst_stride_y, uint8_t* dst_uv, int dst_stride_uv,
int src_width, int src_height, int dst_width, int dst_height);
int MJPGToNV12(const uint8_t* sample,
size_t sample_size,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_uv,
int dst_stride_uv,
int src_width,
int src_height,
int dst_width,
int dst_height);
// Query size of MJPG in pixels.
LIBYUV_API

View File

@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1766
#define LIBYUV_VERSION 1767
#endif // INCLUDE_LIBYUV_VERSION_H_

View File

@ -328,8 +328,10 @@ int MJPGToNV21(const uint8_t* src_mjpg,
return ret ? 0 : 1;
}
static void JpegI420ToNV12(void* opaque, const uint8_t* const* data,
const int* strides, int rows) {
static void JpegI420ToNV12(void* opaque,
const uint8_t* const* data,
const int* strides,
int rows) {
NV21Buffers* dest = (NV21Buffers*)(opaque);
// Use NV21 with VU swapped.
I420ToNV21(data[0], strides[0], data[2], strides[2], data[1], strides[1],
@ -339,8 +341,10 @@ static void JpegI420ToNV12(void* opaque, const uint8_t* const* data,
dest->h -= rows;
}
static void JpegI422ToNV12(void* opaque, const uint8_t* const* data,
const int* strides, int rows) {
static void JpegI422ToNV12(void* opaque,
const uint8_t* const* data,
const int* strides,
int rows) {
NV21Buffers* dest = (NV21Buffers*)(opaque);
// Use NV21 with VU swapped.
I422ToNV21(data[0], strides[0], data[2], strides[2], data[1], strides[1],
@ -350,8 +354,10 @@ static void JpegI422ToNV12(void* opaque, const uint8_t* const* data,
dest->h -= rows;
}
static void JpegI444ToNV12(void* opaque, const uint8_t* const* data,
const int* strides, int rows) {
static void JpegI444ToNV12(void* opaque,
const uint8_t* const* data,
const int* strides,
int rows) {
NV21Buffers* dest = (NV21Buffers*)(opaque);
// Use NV21 with VU swapped.
I444ToNV21(data[0], strides[0], data[2], strides[2], data[1], strides[1],
@ -361,8 +367,10 @@ static void JpegI444ToNV12(void* opaque, const uint8_t* const* data,
dest->h -= rows;
}
static void JpegI400ToNV12(void* opaque, const uint8_t* const* data,
const int* strides, int rows) {
static void JpegI400ToNV12(void* opaque,
const uint8_t* const* data,
const int* strides,
int rows) {
NV21Buffers* dest = (NV21Buffers*)(opaque);
// Use NV21 since there is no UV plane.
I400ToNV21(data[0], strides[0], dest->y, dest->y_stride, dest->vu,
@ -374,9 +382,16 @@ static void JpegI400ToNV12(void* opaque, const uint8_t* const* data,
// MJPG (Motion JPEG) to NV12.
LIBYUV_API
int MJPGToNV12(const uint8_t* sample, size_t sample_size, uint8_t* dst_y,
int dst_stride_y, uint8_t* dst_uv, int dst_stride_uv,
int src_width, int src_height, int dst_width, int dst_height) {
int MJPGToNV12(const uint8_t* sample,
size_t sample_size,
uint8_t* dst_y,
int dst_stride_y,
uint8_t* dst_uv,
int dst_stride_uv,
int src_width,
int src_height,
int dst_width,
int dst_height) {
if (sample_size == kUnknownDataSize) {
// ERROR: MJPEG frame size unknown
return -1;

View File

@ -1063,9 +1063,11 @@ void ScaleUVRowDown2Box_C(const uint8_t* src_uv,
int x;
for (x = 0; x < dst_width; ++x) {
dst_uv[0] = (src_uv[0] + src_uv[2] + src_uv[src_stride] +
src_uv[src_stride + 2] + 2) >> 2;
src_uv[src_stride + 2] + 2) >>
2;
dst_uv[1] = (src_uv[1] + src_uv[3] + src_uv[src_stride + 1] +
src_uv[src_stride + 3] + 2) >> 2;
src_uv[src_stride + 3] + 2) >>
2;
src_uv += 4;
dst_uv += 2;
}

View File

@ -1370,8 +1370,9 @@ int FixedDiv1_X86(int num, int div) {
// Shuffle table for splitting UV into upper and lower part of register.
static const uvec8 kShuffleSplitUV = {0u, 2u, 4u, 6u, 8u, 10u, 12u, 14u,
1u, 3u, 5u, 7u, 9u, 11u, 13u, 15u};
static const uvec8 kShuffleMergeUV = {0u, 8u, 2u, 10u, 4u, 12u, 6u, 14u,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80};
static const uvec8 kShuffleMergeUV = {0u, 8u, 2u, 10u, 4u, 12u,
6u, 14u, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80};
void ScaleUVRowDown2Box_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,

View File

@ -456,4 +456,79 @@ TEST_F(LibYUVScaleTest, YUVToRGBScaleDown) {
EXPECT_LE(diff, 10);
}
TEST_F(LibYUVScaleTest, ARGBTest3x) {
const int kSrcStride = 48 * 4;
const int kDstStride = 16 * 4;
const int kSize = kSrcStride * 3;
align_buffer_page_end(orig_pixels, kSize);
for (int i = 0; i < 48 * 3; ++i) {
orig_pixels[i * 4 + 0] = i;
orig_pixels[i * 4 + 1] = 255 - i;
orig_pixels[i * 4 + 2] = i + 1;
orig_pixels[i * 4 + 3] = i + 10;
}
align_buffer_page_end(dest_pixels, kDstStride);
int iterations16 =
benchmark_width_ * benchmark_height_ / (16 * 1) * benchmark_iterations_;
for (int i = 0; i < iterations16; ++i) {
ARGBScale(orig_pixels, kSrcStride, 48, 3, dest_pixels, kDstStride, 16, 1,
kFilterBilinear);
}
EXPECT_EQ(49, dest_pixels[0]);
EXPECT_EQ(255 - 49, dest_pixels[1]);
EXPECT_EQ(50, dest_pixels[2]);
EXPECT_EQ(59, dest_pixels[3]);
ARGBScale(orig_pixels, kSrcStride, 48, 3, dest_pixels, kDstStride, 16, 1,
kFilterNone);
EXPECT_EQ(49, dest_pixels[0]);
EXPECT_EQ(255 - 49, dest_pixels[1]);
EXPECT_EQ(50, dest_pixels[2]);
EXPECT_EQ(59, dest_pixels[3]);
free_aligned_buffer_page_end(dest_pixels);
free_aligned_buffer_page_end(orig_pixels);
}
TEST_F(LibYUVScaleTest, ARGBTest4x) {
const int kSrcStride = 64 * 4;
const int kDstStride = 16 * 4;
const int kSize = kSrcStride * 4;
align_buffer_page_end(orig_pixels, kSize);
for (int i = 0; i < 64 * 4; ++i) {
orig_pixels[i * 4 + 0] = i;
orig_pixels[i * 4 + 1] = 255 - i;
orig_pixels[i * 4 + 2] = i + 1;
orig_pixels[i * 4 + 3] = i + 10;
}
align_buffer_page_end(dest_pixels, kDstStride);
int iterations16 =
benchmark_width_ * benchmark_height_ / (16 * 1) * benchmark_iterations_;
for (int i = 0; i < iterations16; ++i) {
ARGBScale(orig_pixels, kSrcStride, 64, 4, dest_pixels, kDstStride, 16, 1,
kFilterBilinear);
}
EXPECT_NEAR((65 + 66 + 129 + 130 + 2) / 4, dest_pixels[0], 4);
EXPECT_NEAR((255 - 65 + 255 - 66 + 255 - 129 + 255 - 130 + 2) / 4,
dest_pixels[1], 4);
EXPECT_NEAR((1 * 4 + 65 + 66 + 129 + 130 + 2) / 4, dest_pixels[2], 4);
EXPECT_NEAR((10 * 4 + 65 + 66 + 129 + 130 + 2) / 4, dest_pixels[3], 4);
ARGBScale(orig_pixels, kSrcStride, 64, 4, dest_pixels, kDstStride, 16, 1,
kFilterNone);
EXPECT_EQ(130, dest_pixels[0]);
EXPECT_EQ(255 - 130, dest_pixels[1]);
EXPECT_EQ(130 + 1, dest_pixels[2]);
EXPECT_EQ(130 + 10, dest_pixels[3]);
free_aligned_buffer_page_end(dest_pixels);
free_aligned_buffer_page_end(orig_pixels);
}
} // namespace libyuv

View File

@ -46,8 +46,7 @@ static int UVTestFilter(int src_width,
}
MemRandomize(src_uv, src_uv_plane_size);
int64_t dst_uv_plane_size =
(dst_width + b * 2) * (dst_height + b * 2) * 2LL;
int64_t dst_uv_plane_size = (dst_width + b * 2) * (dst_height + b * 2) * 2LL;
int dst_stride_uv = (b * 2 + dst_width) * 2;
align_buffer_page_end(dst_uv_c, dst_uv_plane_size);
@ -61,28 +60,27 @@ static int UVTestFilter(int src_width,
// Warm up both versions for consistent benchmarks.
MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
UVScale(src_uv + (src_stride_uv * b) + b * 2, src_stride_uv,
src_width, src_height, dst_uv_c + (dst_stride_uv * b) + b * 2,
dst_stride_uv, dst_width, dst_height, f);
UVScale(src_uv + (src_stride_uv * b) + b * 2, src_stride_uv, src_width,
src_height, dst_uv_c + (dst_stride_uv * b) + b * 2, dst_stride_uv,
dst_width, dst_height, f);
MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
UVScale(src_uv + (src_stride_uv * b) + b * 2, src_stride_uv,
src_width, src_height, dst_uv_opt + (dst_stride_uv * b) + b * 2,
dst_stride_uv, dst_width, dst_height, f);
UVScale(src_uv + (src_stride_uv * b) + b * 2, src_stride_uv, src_width,
src_height, dst_uv_opt + (dst_stride_uv * b) + b * 2, dst_stride_uv,
dst_width, dst_height, f);
MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
double c_time = get_time();
UVScale(src_uv + (src_stride_uv * b) + b * 2, src_stride_uv,
src_width, src_height, dst_uv_c + (dst_stride_uv * b) + b * 2,
dst_stride_uv, dst_width, dst_height, f);
UVScale(src_uv + (src_stride_uv * b) + b * 2, src_stride_uv, src_width,
src_height, dst_uv_c + (dst_stride_uv * b) + b * 2, dst_stride_uv,
dst_width, dst_height, f);
c_time = (get_time() - c_time);
MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
double opt_time = get_time();
for (i = 0; i < benchmark_iterations; ++i) {
UVScale(src_uv + (src_stride_uv * b) + b * 2, src_stride_uv,
src_width, src_height,
dst_uv_opt + (dst_stride_uv * b) + b * 2, dst_stride_uv,
UVScale(src_uv + (src_stride_uv * b) + b * 2, src_stride_uv, src_width,
src_height, dst_uv_opt + (dst_stride_uv * b) + b * 2, dst_stride_uv,
dst_width, dst_height, f);
}
opt_time = (get_time() - opt_time) / benchmark_iterations;
@ -178,4 +176,67 @@ TEST_SCALETO(UVScale, 1920, 1080)
#undef TEST_SCALETO1
#undef TEST_SCALETO
TEST_F(LibYUVScaleTest, UVTest3x) {
const int kSrcStride = 48 * 2;
const int kDstStride = 16 * 2;
const int kSize = kSrcStride * 3;
align_buffer_page_end(orig_pixels, kSize);
for (int i = 0; i < 48 * 3; ++i) {
orig_pixels[i * 2 + 0] = i;
orig_pixels[i * 2 + 1] = 255 - i;
}
align_buffer_page_end(dest_pixels, kDstStride);
int iterations16 =
benchmark_width_ * benchmark_height_ / (16 * 1) * benchmark_iterations_;
for (int i = 0; i < iterations16; ++i) {
UVScale(orig_pixels, kSrcStride, 48, 3, dest_pixels, kDstStride, 16, 1,
kFilterBilinear);
}
EXPECT_EQ(49, dest_pixels[0]);
EXPECT_EQ(255 - 49, dest_pixels[1]);
UVScale(orig_pixels, kSrcStride, 48, 3, dest_pixels, kDstStride, 16, 1,
kFilterNone);
EXPECT_EQ(49, dest_pixels[0]);
EXPECT_EQ(255 - 49, dest_pixels[1]);
free_aligned_buffer_page_end(dest_pixels);
free_aligned_buffer_page_end(orig_pixels);
}
TEST_F(LibYUVScaleTest, UVTest4x) {
const int kSrcStride = 64 * 2;
const int kDstStride = 16 * 2;
const int kSize = kSrcStride * 4;
align_buffer_page_end(orig_pixels, kSize);
for (int i = 0; i < 64 * 4; ++i) {
orig_pixels[i * 2 + 0] = i;
orig_pixels[i * 2 + 1] = 255 - i;
}
align_buffer_page_end(dest_pixels, kDstStride);
int iterations16 =
benchmark_width_ * benchmark_height_ / (16 * 1) * benchmark_iterations_;
for (int i = 0; i < iterations16; ++i) {
UVScale(orig_pixels, kSrcStride, 64, 4, dest_pixels, kDstStride, 16, 1,
kFilterBilinear);
}
EXPECT_EQ((65 + 66 + 129 + 130 + 2) / 4, dest_pixels[0]);
EXPECT_EQ((255 - 65 + 255 - 66 + 255 - 129 + 255 - 130 + 2) / 4,
dest_pixels[1]);
UVScale(orig_pixels, kSrcStride, 64, 4, dest_pixels, kDstStride, 16, 1,
kFilterNone);
EXPECT_EQ(130, dest_pixels[0]); // expect the 3rd pixel of the 3rd row
EXPECT_EQ(255 - 130, dest_pixels[1]);
free_aligned_buffer_page_end(dest_pixels);
free_aligned_buffer_page_end(orig_pixels);
}
} // namespace libyuv