mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 01:06:46 +08:00
Add LIBYUV_API to NV12ToABGR and I444Rotate, I444Scale
Gaussian blur low levels ported to 32 bit neon. But they are not hooked up to anything but a unittest. Bug:b/248041731, b/132108021, b/129908793 Change-Id: Iccebb8ffd6b719810aa11dd770a525227da4c357 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/1611206 Commit-Queue: Frank Barchard <fbarchard@chromium.org> Reviewed-by: Chong Zhang <chz@google.com>
This commit is contained in:
parent
05f72b8602
commit
681c6c6739
@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1727
|
||||
Version: 1730
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@ -256,6 +256,7 @@ int NV21ToARGB(const uint8_t* src_y,
|
||||
int height);
|
||||
|
||||
// Convert NV12 to ABGR.
|
||||
LIBYUV_API
|
||||
int NV12ToABGR(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_uv,
|
||||
|
||||
@ -126,6 +126,25 @@ int I444Scale(const uint8_t* src_y,
|
||||
int dst_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
LIBYUV_API
|
||||
int I444Scale_16(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
#ifdef __cplusplus
|
||||
// Legacy API. Deprecated.
|
||||
LIBYUV_API
|
||||
|
||||
@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1727
|
||||
#define LIBYUV_VERSION 1730
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
@ -1793,8 +1793,9 @@ int NV21ToARGB(const uint8_t* src_y,
|
||||
}
|
||||
|
||||
// Convert NV12 to ABGR.
|
||||
// To output ABGR instead of ARGB swap the UV and use a mirrrored yuc matrix.
|
||||
// To output ABGR instead of ARGB swap the UV and use a mirrored yuv matrix.
|
||||
// To swap the UV use NV12 instead of NV21.LIBYUV_API
|
||||
LIBYUV_API
|
||||
int NV12ToABGR(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_uv,
|
||||
|
||||
@ -521,28 +521,19 @@ int I444Rotate(const uint8_t* src_y,
|
||||
CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
|
||||
return 0;
|
||||
case libyuv::kRotate90:
|
||||
RotatePlane90(src_y, src_stride_y, dst_y, dst_stride_y,
|
||||
width, height);
|
||||
RotatePlane90(src_u, src_stride_u, dst_u, dst_stride_u,
|
||||
width, height);
|
||||
RotatePlane90(src_v, src_stride_v, dst_v, dst_stride_v,
|
||||
width, height);
|
||||
RotatePlane90(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
RotatePlane90(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
|
||||
RotatePlane90(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
|
||||
return 0;
|
||||
case libyuv::kRotate270:
|
||||
RotatePlane270(src_y, src_stride_y, dst_y, dst_stride_y,
|
||||
width, height);
|
||||
RotatePlane270(src_u, src_stride_u, dst_u, dst_stride_u,
|
||||
width, height);
|
||||
RotatePlane270(src_v, src_stride_v, dst_v, dst_stride_v,
|
||||
width, height);
|
||||
RotatePlane270(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
RotatePlane270(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
|
||||
RotatePlane270(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
|
||||
return 0;
|
||||
case libyuv::kRotate180:
|
||||
RotatePlane180(src_y, src_stride_y, dst_y, dst_stride_y,
|
||||
width, height);
|
||||
RotatePlane180(src_u, src_stride_u, dst_u, dst_stride_u,
|
||||
width, height);
|
||||
RotatePlane180(src_v, src_stride_v, dst_v, dst_stride_v,
|
||||
width, height);
|
||||
RotatePlane180(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
RotatePlane180(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
|
||||
RotatePlane180(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
|
||||
return 0;
|
||||
default:
|
||||
break;
|
||||
|
||||
@ -2685,6 +2685,84 @@ void ByteToFloatRow_NEON(const uint8_t* src,
|
||||
: "cc", "memory", "q1", "q2", "q3");
|
||||
}
|
||||
|
||||
// filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row.
|
||||
void GaussCol_NEON(const uint16_t* src0,
|
||||
const uint16_t* src1,
|
||||
const uint16_t* src2,
|
||||
const uint16_t* src3,
|
||||
const uint16_t* src4,
|
||||
uint32_t* dst,
|
||||
int width) {
|
||||
asm volatile(
|
||||
"vmov.u16 d6, #4 \n" // constant 4
|
||||
"vmov.u16 d7, #6 \n" // constant 6
|
||||
|
||||
"1: \n"
|
||||
"vld1.16 {q1}, [%0]! \n" // load 8 samples, 5 rows
|
||||
"vld1.16 {q2}, [%4]! \n"
|
||||
"vaddl.u16 q0, d2, d4 \n" // * 1
|
||||
"vaddl.u16 q1, d3, d5 \n" // * 1
|
||||
"vld1.16 {q2}, [%1]! \n"
|
||||
"vmlal.u16 q0, d4, d6 \n" // * 4
|
||||
"vmlal.u16 q1, d5, d6 \n" // * 4
|
||||
"vld1.16 {q2}, [%2]! \n"
|
||||
"vmlal.u16 q0, d4, d7 \n" // * 6
|
||||
"vmlal.u16 q1, d5, d7 \n" // * 6
|
||||
"vld1.16 {q2}, [%3]! \n"
|
||||
"vmlal.u16 q0, d4, d6 \n" // * 4
|
||||
"vmlal.u16 q1, d5, d6 \n" // * 4
|
||||
"subs %6, %6, #8 \n" // 8 processed per loop
|
||||
"vst1.32 {q0, q1}, [%5]! \n" // store 8 samples
|
||||
"bgt 1b \n"
|
||||
: "+r"(src0), // %0
|
||||
"+r"(src1), // %1
|
||||
"+r"(src2), // %2
|
||||
"+r"(src3), // %3
|
||||
"+r"(src4), // %4
|
||||
"+r"(dst), // %5
|
||||
"+r"(width) // %6
|
||||
:
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3");
|
||||
}
|
||||
|
||||
// filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row.
|
||||
void GaussRow_NEON(const uint32_t* src, uint16_t* dst, int width) {
|
||||
const uint32_t* src1 = src + 1;
|
||||
const uint32_t* src2 = src + 2;
|
||||
const uint32_t* src3 = src + 3;
|
||||
asm volatile(
|
||||
"vmov.u32 q10, #4 \n" // constant 4
|
||||
"vmov.u32 q11, #6 \n" // constant 6
|
||||
|
||||
"1: \n"
|
||||
"vld1.32 {q0, q1}, [%0]! \n" // load 12 source samples
|
||||
"vld1.32 {q2}, [%0] \n"
|
||||
"vadd.u32 q0, q0, q1 \n" // * 1
|
||||
"vadd.u32 q1, q1, q2 \n" // * 1
|
||||
"vld1.32 {q2, q3}, [%2]! \n"
|
||||
"vmla.u32 q0, q2, q11 \n" // * 6
|
||||
"vmla.u32 q1, q3, q11 \n" // * 6
|
||||
"vld1.32 {q2, q3}, [%1]! \n"
|
||||
"vld1.32 {q8, q9}, [%3]! \n"
|
||||
"vadd.u32 q2, q2, q8 \n" // add rows for * 4
|
||||
"vadd.u32 q3, q3, q9 \n"
|
||||
"vmla.u32 q0, q2, q10 \n" // * 4
|
||||
"vmla.u32 q1, q3, q10 \n" // * 4
|
||||
"subs %5, %5, #8 \n" // 8 processed per loop
|
||||
"vqshrn.u32 d0, q0, #8 \n" // round and pack
|
||||
"vqshrn.u32 d1, q1, #8 \n"
|
||||
"vst1.u16 {q0}, [%4]! \n" // store 8 samples
|
||||
"bgt 1b \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(src1), // %1
|
||||
"+r"(src2), // %2
|
||||
"+r"(src3), // %3
|
||||
"+r"(dst), // %4
|
||||
"+r"(width) // %5
|
||||
:
|
||||
: "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11");
|
||||
}
|
||||
|
||||
// Convert biplanar NV21 to packed YUV24
|
||||
void NV21ToYUV24Row_NEON(const uint8_t* src_y,
|
||||
const uint8_t* src_vu,
|
||||
|
||||
@ -1824,6 +1824,39 @@ int I444Scale(const uint8_t* src_y,
|
||||
return 0;
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int I444Scale_16(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering) {
|
||||
if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
|
||||
src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
|
||||
dst_width <= 0 || dst_height <= 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
|
||||
dst_width, dst_height, filtering);
|
||||
ScalePlane_16(src_u, src_stride_u, src_width, src_height, dst_u, dst_stride_u,
|
||||
dst_width, dst_height, filtering);
|
||||
ScalePlane_16(src_v, src_stride_v, src_width, src_height, dst_v, dst_stride_v,
|
||||
dst_width, dst_height, filtering);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Deprecated api
|
||||
LIBYUV_API
|
||||
int Scale(const uint8_t* src_y,
|
||||
|
||||
@ -3186,7 +3186,8 @@ TEST_F(LibYUVPlanarTest, TestGaussRow_Opt) {
|
||||
}
|
||||
GaussRow_C(&orig_pixels[0], &dst_pixels_c[0], 640);
|
||||
for (int i = 0; i < benchmark_pixels_div1280_ * 2; ++i) {
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && \
|
||||
(defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
|
||||
int has_neon = TestCpuFlag(kCpuHasNEON);
|
||||
if (has_neon) {
|
||||
GaussRow_NEON(&orig_pixels[0], &dst_pixels_opt[0], 640);
|
||||
@ -3239,7 +3240,8 @@ TEST_F(LibYUVPlanarTest, TestGaussCol_Opt) {
|
||||
&orig_pixels[640 * 3], &orig_pixels[640 * 4], &dst_pixels_c[0],
|
||||
640);
|
||||
for (int i = 0; i < benchmark_pixels_div1280_ * 2; ++i) {
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && \
|
||||
(defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
|
||||
int has_neon = TestCpuFlag(kCpuHasNEON);
|
||||
if (has_neon) {
|
||||
GaussCol_NEON(&orig_pixels[0], &orig_pixels[640], &orig_pixels[640 * 2],
|
||||
|
||||
@ -135,6 +135,123 @@ TEST_F(LibYUVRotateTest, DISABLED_I420Rotate270_Odd) {
|
||||
benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
static void I444TestRotate(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
libyuv::RotationMode mode,
|
||||
int benchmark_iterations,
|
||||
int disable_cpu_flags,
|
||||
int benchmark_cpu_info) {
|
||||
if (src_width < 1) {
|
||||
src_width = 1;
|
||||
}
|
||||
if (src_height == 0) {
|
||||
src_height = 1;
|
||||
}
|
||||
if (dst_width < 1) {
|
||||
dst_width = 1;
|
||||
}
|
||||
if (dst_height < 1) {
|
||||
dst_height = 1;
|
||||
}
|
||||
int src_i444_y_size = src_width * Abs(src_height);
|
||||
int src_i444_uv_size = src_width * Abs(src_height);
|
||||
int src_i444_size = src_i444_y_size + src_i444_uv_size * 2;
|
||||
align_buffer_page_end(src_i444, src_i444_size);
|
||||
for (int i = 0; i < src_i444_size; ++i) {
|
||||
src_i444[i] = fastrand() & 0xff;
|
||||
}
|
||||
|
||||
int dst_i444_y_size = dst_width * dst_height;
|
||||
int dst_i444_uv_size = dst_width * dst_height;
|
||||
int dst_i444_size = dst_i444_y_size + dst_i444_uv_size * 2;
|
||||
align_buffer_page_end(dst_i444_c, dst_i444_size);
|
||||
align_buffer_page_end(dst_i444_opt, dst_i444_size);
|
||||
memset(dst_i444_c, 2, dst_i444_size);
|
||||
memset(dst_i444_opt, 3, dst_i444_size);
|
||||
|
||||
MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
|
||||
I444Rotate(src_i444, src_width, src_i444 + src_i444_y_size, src_width,
|
||||
src_i444 + src_i444_y_size + src_i444_uv_size, src_width,
|
||||
dst_i444_c, dst_width, dst_i444_c + dst_i444_y_size, dst_width,
|
||||
dst_i444_c + dst_i444_y_size + dst_i444_uv_size, dst_width,
|
||||
src_width, src_height, mode);
|
||||
|
||||
MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
|
||||
for (int i = 0; i < benchmark_iterations; ++i) {
|
||||
I444Rotate(src_i444, src_width, src_i444 + src_i444_y_size, src_width,
|
||||
src_i444 + src_i444_y_size + src_i444_uv_size, src_width,
|
||||
dst_i444_opt, dst_width, dst_i444_opt + dst_i444_y_size,
|
||||
dst_width, dst_i444_opt + dst_i444_y_size + dst_i444_uv_size,
|
||||
dst_width, src_width, src_height, mode);
|
||||
}
|
||||
|
||||
// Rotation should be exact.
|
||||
for (int i = 0; i < dst_i444_size; ++i) {
|
||||
EXPECT_EQ(dst_i444_c[i], dst_i444_opt[i]);
|
||||
}
|
||||
|
||||
free_aligned_buffer_page_end(dst_i444_c);
|
||||
free_aligned_buffer_page_end(dst_i444_opt);
|
||||
free_aligned_buffer_page_end(src_i444);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, I444Rotate0_Opt) {
|
||||
I444TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
|
||||
benchmark_height_, kRotate0, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, I444Rotate90_Opt) {
|
||||
I444TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
|
||||
benchmark_width_, kRotate90, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, I444Rotate180_Opt) {
|
||||
I444TestRotate(benchmark_width_, benchmark_height_, benchmark_width_,
|
||||
benchmark_height_, kRotate180, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, I444Rotate270_Opt) {
|
||||
I444TestRotate(benchmark_width_, benchmark_height_, benchmark_height_,
|
||||
benchmark_width_, kRotate270, benchmark_iterations_,
|
||||
disable_cpu_flags_, benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Remove odd width tests.
|
||||
// Odd width tests work but disabled because they use C code and can be
|
||||
// tested by passing an odd width command line or environment variable.
|
||||
TEST_F(LibYUVRotateTest, DISABLED_I444Rotate0_Odd) {
|
||||
I444TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
|
||||
benchmark_width_ - 3, benchmark_height_ - 1, kRotate0,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, DISABLED_I444Rotate90_Odd) {
|
||||
I444TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
|
||||
benchmark_height_ - 1, benchmark_width_ - 3, kRotate90,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, DISABLED_I444Rotate180_Odd) {
|
||||
I444TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
|
||||
benchmark_width_ - 3, benchmark_height_ - 1, kRotate180,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVRotateTest, DISABLED_I444Rotate270_Odd) {
|
||||
I444TestRotate(benchmark_width_ - 3, benchmark_height_ - 1,
|
||||
benchmark_height_ - 1, benchmark_width_ - 3, kRotate270,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_);
|
||||
}
|
||||
|
||||
static void NV12TestRotate(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
|
||||
@ -22,14 +22,14 @@
|
||||
namespace libyuv {
|
||||
|
||||
// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
|
||||
static int TestFilter(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
FilterMode f,
|
||||
int benchmark_iterations,
|
||||
int disable_cpu_flags,
|
||||
int benchmark_cpu_info) {
|
||||
static int I420TestFilter(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
FilterMode f,
|
||||
int benchmark_iterations,
|
||||
int disable_cpu_flags,
|
||||
int benchmark_cpu_info) {
|
||||
if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
|
||||
return 0;
|
||||
}
|
||||
@ -141,14 +141,14 @@ static int TestFilter(int src_width,
|
||||
|
||||
// Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference.
|
||||
// 0 = exact.
|
||||
static int TestFilter_16(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
FilterMode f,
|
||||
int benchmark_iterations,
|
||||
int disable_cpu_flags,
|
||||
int benchmark_cpu_info) {
|
||||
static int I420TestFilter_16(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
FilterMode f,
|
||||
int benchmark_iterations,
|
||||
int disable_cpu_flags,
|
||||
int benchmark_cpu_info) {
|
||||
if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
|
||||
return 0;
|
||||
}
|
||||
@ -256,6 +256,241 @@ static int TestFilter_16(int src_width,
|
||||
return max_diff;
|
||||
}
|
||||
|
||||
// Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
|
||||
static int I444TestFilter(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
FilterMode f,
|
||||
int benchmark_iterations,
|
||||
int disable_cpu_flags,
|
||||
int benchmark_cpu_info) {
|
||||
if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int i, j;
|
||||
int src_width_uv = Abs(src_width);
|
||||
int src_height_uv = Abs(src_height);
|
||||
|
||||
int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
|
||||
int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
|
||||
|
||||
int src_stride_y = Abs(src_width);
|
||||
int src_stride_uv = src_width_uv;
|
||||
|
||||
align_buffer_page_end(src_y, src_y_plane_size);
|
||||
align_buffer_page_end(src_u, src_uv_plane_size);
|
||||
align_buffer_page_end(src_v, src_uv_plane_size);
|
||||
if (!src_y || !src_u || !src_v) {
|
||||
printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
|
||||
return 0;
|
||||
}
|
||||
MemRandomize(src_y, src_y_plane_size);
|
||||
MemRandomize(src_u, src_uv_plane_size);
|
||||
MemRandomize(src_v, src_uv_plane_size);
|
||||
|
||||
int dst_width_uv = dst_width;
|
||||
int dst_height_uv = dst_height;
|
||||
|
||||
int64_t dst_y_plane_size = (dst_width) * (dst_height);
|
||||
int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
|
||||
|
||||
int dst_stride_y = dst_width;
|
||||
int dst_stride_uv = dst_width_uv;
|
||||
|
||||
align_buffer_page_end(dst_y_c, dst_y_plane_size);
|
||||
align_buffer_page_end(dst_u_c, dst_uv_plane_size);
|
||||
align_buffer_page_end(dst_v_c, dst_uv_plane_size);
|
||||
align_buffer_page_end(dst_y_opt, dst_y_plane_size);
|
||||
align_buffer_page_end(dst_u_opt, dst_uv_plane_size);
|
||||
align_buffer_page_end(dst_v_opt, dst_uv_plane_size);
|
||||
if (!dst_y_c || !dst_u_c || !dst_v_c || !dst_y_opt || !dst_u_opt ||
|
||||
!dst_v_opt) {
|
||||
printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
|
||||
double c_time = get_time();
|
||||
I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
|
||||
src_width, src_height, dst_y_c, dst_stride_y, dst_u_c,
|
||||
dst_stride_uv, dst_v_c, dst_stride_uv, dst_width, dst_height, f);
|
||||
c_time = (get_time() - c_time);
|
||||
|
||||
MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
|
||||
double opt_time = get_time();
|
||||
for (i = 0; i < benchmark_iterations; ++i) {
|
||||
I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
|
||||
src_width, src_height, dst_y_opt, dst_stride_y, dst_u_opt,
|
||||
dst_stride_uv, dst_v_opt, dst_stride_uv, dst_width, dst_height,
|
||||
f);
|
||||
}
|
||||
opt_time = (get_time() - opt_time) / benchmark_iterations;
|
||||
// Report performance of C vs OPT.
|
||||
printf("filter %d - %8d us C - %8d us OPT\n", f,
|
||||
static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
|
||||
|
||||
// C version may be a little off from the optimized. Order of
|
||||
// operations may introduce rounding somewhere. So do a difference
|
||||
// of the buffers and look to see that the max difference is not
|
||||
// over 3.
|
||||
int max_diff = 0;
|
||||
for (i = 0; i < (dst_height); ++i) {
|
||||
for (j = 0; j < (dst_width); ++j) {
|
||||
int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] -
|
||||
dst_y_opt[(i * dst_stride_y) + j]);
|
||||
if (abs_diff > max_diff) {
|
||||
max_diff = abs_diff;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < (dst_height_uv); ++i) {
|
||||
for (j = 0; j < (dst_width_uv); ++j) {
|
||||
int abs_diff = Abs(dst_u_c[(i * dst_stride_uv) + j] -
|
||||
dst_u_opt[(i * dst_stride_uv) + j]);
|
||||
if (abs_diff > max_diff) {
|
||||
max_diff = abs_diff;
|
||||
}
|
||||
abs_diff = Abs(dst_v_c[(i * dst_stride_uv) + j] -
|
||||
dst_v_opt[(i * dst_stride_uv) + j]);
|
||||
if (abs_diff > max_diff) {
|
||||
max_diff = abs_diff;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free_aligned_buffer_page_end(dst_y_c);
|
||||
free_aligned_buffer_page_end(dst_u_c);
|
||||
free_aligned_buffer_page_end(dst_v_c);
|
||||
free_aligned_buffer_page_end(dst_y_opt);
|
||||
free_aligned_buffer_page_end(dst_u_opt);
|
||||
free_aligned_buffer_page_end(dst_v_opt);
|
||||
free_aligned_buffer_page_end(src_y);
|
||||
free_aligned_buffer_page_end(src_u);
|
||||
free_aligned_buffer_page_end(src_v);
|
||||
|
||||
return max_diff;
|
||||
}
|
||||
|
||||
// Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference.
|
||||
// 0 = exact.
|
||||
static int I444TestFilter_16(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
FilterMode f,
|
||||
int benchmark_iterations,
|
||||
int disable_cpu_flags,
|
||||
int benchmark_cpu_info) {
|
||||
if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int i;
|
||||
int src_width_uv = Abs(src_width);
|
||||
int src_height_uv = Abs(src_height);
|
||||
|
||||
int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
|
||||
int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
|
||||
|
||||
int src_stride_y = Abs(src_width);
|
||||
int src_stride_uv = src_width_uv;
|
||||
|
||||
align_buffer_page_end(src_y, src_y_plane_size);
|
||||
align_buffer_page_end(src_u, src_uv_plane_size);
|
||||
align_buffer_page_end(src_v, src_uv_plane_size);
|
||||
align_buffer_page_end(src_y_16, src_y_plane_size * 2);
|
||||
align_buffer_page_end(src_u_16, src_uv_plane_size * 2);
|
||||
align_buffer_page_end(src_v_16, src_uv_plane_size * 2);
|
||||
if (!src_y || !src_u || !src_v || !src_y_16 || !src_u_16 || !src_v_16) {
|
||||
printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
|
||||
return 0;
|
||||
}
|
||||
uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16);
|
||||
uint16_t* p_src_u_16 = reinterpret_cast<uint16_t*>(src_u_16);
|
||||
uint16_t* p_src_v_16 = reinterpret_cast<uint16_t*>(src_v_16);
|
||||
|
||||
MemRandomize(src_y, src_y_plane_size);
|
||||
MemRandomize(src_u, src_uv_plane_size);
|
||||
MemRandomize(src_v, src_uv_plane_size);
|
||||
|
||||
for (i = 0; i < src_y_plane_size; ++i) {
|
||||
p_src_y_16[i] = src_y[i];
|
||||
}
|
||||
for (i = 0; i < src_uv_plane_size; ++i) {
|
||||
p_src_u_16[i] = src_u[i];
|
||||
p_src_v_16[i] = src_v[i];
|
||||
}
|
||||
|
||||
int dst_width_uv = dst_width;
|
||||
int dst_height_uv = dst_height;
|
||||
|
||||
int dst_y_plane_size = (dst_width) * (dst_height);
|
||||
int dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
|
||||
|
||||
int dst_stride_y = dst_width;
|
||||
int dst_stride_uv = dst_width_uv;
|
||||
|
||||
align_buffer_page_end(dst_y_8, dst_y_plane_size);
|
||||
align_buffer_page_end(dst_u_8, dst_uv_plane_size);
|
||||
align_buffer_page_end(dst_v_8, dst_uv_plane_size);
|
||||
align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
|
||||
align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2);
|
||||
align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2);
|
||||
|
||||
uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16);
|
||||
uint16_t* p_dst_u_16 = reinterpret_cast<uint16_t*>(dst_u_16);
|
||||
uint16_t* p_dst_v_16 = reinterpret_cast<uint16_t*>(dst_v_16);
|
||||
|
||||
MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
|
||||
I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
|
||||
src_width, src_height, dst_y_8, dst_stride_y, dst_u_8,
|
||||
dst_stride_uv, dst_v_8, dst_stride_uv, dst_width, dst_height, f);
|
||||
MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
|
||||
for (i = 0; i < benchmark_iterations; ++i) {
|
||||
I444Scale_16(p_src_y_16, src_stride_y, p_src_u_16, src_stride_uv,
|
||||
p_src_v_16, src_stride_uv, src_width, src_height, p_dst_y_16,
|
||||
dst_stride_y, p_dst_u_16, dst_stride_uv, p_dst_v_16,
|
||||
dst_stride_uv, dst_width, dst_height, f);
|
||||
}
|
||||
|
||||
// Expect an exact match.
|
||||
int max_diff = 0;
|
||||
for (i = 0; i < dst_y_plane_size; ++i) {
|
||||
int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]);
|
||||
if (abs_diff > max_diff) {
|
||||
max_diff = abs_diff;
|
||||
}
|
||||
}
|
||||
for (i = 0; i < dst_uv_plane_size; ++i) {
|
||||
int abs_diff = Abs(dst_u_8[i] - p_dst_u_16[i]);
|
||||
if (abs_diff > max_diff) {
|
||||
max_diff = abs_diff;
|
||||
}
|
||||
abs_diff = Abs(dst_v_8[i] - p_dst_v_16[i]);
|
||||
if (abs_diff > max_diff) {
|
||||
max_diff = abs_diff;
|
||||
}
|
||||
}
|
||||
|
||||
free_aligned_buffer_page_end(dst_y_8);
|
||||
free_aligned_buffer_page_end(dst_u_8);
|
||||
free_aligned_buffer_page_end(dst_v_8);
|
||||
free_aligned_buffer_page_end(dst_y_16);
|
||||
free_aligned_buffer_page_end(dst_u_16);
|
||||
free_aligned_buffer_page_end(dst_v_16);
|
||||
free_aligned_buffer_page_end(src_y);
|
||||
free_aligned_buffer_page_end(src_u);
|
||||
free_aligned_buffer_page_end(src_v);
|
||||
free_aligned_buffer_page_end(src_y_16);
|
||||
free_aligned_buffer_page_end(src_u_16);
|
||||
free_aligned_buffer_page_end(src_v_16);
|
||||
|
||||
return max_diff;
|
||||
}
|
||||
|
||||
// The following adjustments in dimensions ensure the scale factor will be
|
||||
// exactly achieved.
|
||||
// 2 is chroma subsample.
|
||||
@ -263,16 +498,32 @@ static int TestFilter_16(int src_width,
|
||||
#define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
|
||||
|
||||
#define TEST_FACTOR1(name, filter, nom, denom, max_diff) \
|
||||
TEST_F(LibYUVScaleTest, ScaleDownBy##name##_##filter) { \
|
||||
int diff = TestFilter( \
|
||||
TEST_F(LibYUVScaleTest, I420ScaleDownBy##name##_##filter) { \
|
||||
int diff = I420TestFilter( \
|
||||
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
|
||||
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
|
||||
kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
|
||||
benchmark_cpu_info_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
} \
|
||||
TEST_F(LibYUVScaleTest, ScaleDownBy##name##_##filter##_16) { \
|
||||
int diff = TestFilter_16( \
|
||||
TEST_F(LibYUVScaleTest, I444ScaleDownBy##name##_##filter) { \
|
||||
int diff = I444TestFilter( \
|
||||
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
|
||||
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
|
||||
kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
|
||||
benchmark_cpu_info_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
} \
|
||||
TEST_F(LibYUVScaleTest, I420ScaleDownBy##name##_##filter##_16) { \
|
||||
int diff = I420TestFilter_16( \
|
||||
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
|
||||
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
|
||||
kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
|
||||
benchmark_cpu_info_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
} \
|
||||
TEST_F(LibYUVScaleTest, I444ScaleDownBy##name##_##filter##_16) { \
|
||||
int diff = I444TestFilter_16( \
|
||||
SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
|
||||
DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
|
||||
kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
|
||||
@ -300,30 +551,58 @@ TEST_FACTOR(3, 1, 3, 0)
|
||||
#undef DX
|
||||
|
||||
#define TEST_SCALETO1(name, width, height, filter, max_diff) \
|
||||
TEST_F(LibYUVScaleTest, name##To##width##x##height##_##filter) { \
|
||||
int diff = TestFilter(benchmark_width_, benchmark_height_, width, height, \
|
||||
kFilter##filter, benchmark_iterations_, \
|
||||
disable_cpu_flags_, benchmark_cpu_info_); \
|
||||
TEST_F(LibYUVScaleTest, I420##name##To##width##x##height##_##filter) { \
|
||||
int diff = I420TestFilter(benchmark_width_, benchmark_height_, width, \
|
||||
height, kFilter##filter, benchmark_iterations_, \
|
||||
disable_cpu_flags_, benchmark_cpu_info_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
} \
|
||||
TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter) { \
|
||||
int diff = TestFilter(width, height, Abs(benchmark_width_), \
|
||||
Abs(benchmark_height_), kFilter##filter, \
|
||||
benchmark_iterations_, disable_cpu_flags_, \
|
||||
benchmark_cpu_info_); \
|
||||
TEST_F(LibYUVScaleTest, I444##name##To##width##x##height##_##filter) { \
|
||||
int diff = I444TestFilter(benchmark_width_, benchmark_height_, width, \
|
||||
height, kFilter##filter, benchmark_iterations_, \
|
||||
disable_cpu_flags_, benchmark_cpu_info_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
} \
|
||||
TEST_F(LibYUVScaleTest, name##To##width##x##height##_##filter##_16) { \
|
||||
int diff = TestFilter_16(benchmark_width_, benchmark_height_, width, \
|
||||
height, kFilter##filter, benchmark_iterations_, \
|
||||
disable_cpu_flags_, benchmark_cpu_info_); \
|
||||
TEST_F(LibYUVScaleTest, I420##name##To##width##x##height##_##filter##_16) { \
|
||||
int diff = I420TestFilter_16( \
|
||||
benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
|
||||
benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
} \
|
||||
TEST_F(LibYUVScaleTest, name##From##width##x##height##_##filter##_16) { \
|
||||
int diff = TestFilter_16(width, height, Abs(benchmark_width_), \
|
||||
Abs(benchmark_height_), kFilter##filter, \
|
||||
benchmark_iterations_, disable_cpu_flags_, \
|
||||
benchmark_cpu_info_); \
|
||||
TEST_F(LibYUVScaleTest, I444##name##To##width##x##height##_##filter##_16) { \
|
||||
int diff = I444TestFilter_16( \
|
||||
benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
|
||||
benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
} \
|
||||
TEST_F(LibYUVScaleTest, I420##name##From##width##x##height##_##filter) { \
|
||||
int diff = I420TestFilter(width, height, Abs(benchmark_width_), \
|
||||
Abs(benchmark_height_), kFilter##filter, \
|
||||
benchmark_iterations_, disable_cpu_flags_, \
|
||||
benchmark_cpu_info_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
} \
|
||||
TEST_F(LibYUVScaleTest, I444##name##From##width##x##height##_##filter) { \
|
||||
int diff = I444TestFilter(width, height, Abs(benchmark_width_), \
|
||||
Abs(benchmark_height_), kFilter##filter, \
|
||||
benchmark_iterations_, disable_cpu_flags_, \
|
||||
benchmark_cpu_info_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
} \
|
||||
TEST_F(LibYUVScaleTest, \
|
||||
I420##name##From##width##x##height##_##filter##_16) { \
|
||||
int diff = I420TestFilter_16(width, height, Abs(benchmark_width_), \
|
||||
Abs(benchmark_height_), kFilter##filter, \
|
||||
benchmark_iterations_, disable_cpu_flags_, \
|
||||
benchmark_cpu_info_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
} \
|
||||
TEST_F(LibYUVScaleTest, \
|
||||
I444##name##From##width##x##height##_##filter##_16) { \
|
||||
int diff = I444TestFilter_16(width, height, Abs(benchmark_width_), \
|
||||
Abs(benchmark_height_), kFilter##filter, \
|
||||
benchmark_iterations_, disable_cpu_flags_, \
|
||||
benchmark_cpu_info_); \
|
||||
EXPECT_LE(diff, max_diff); \
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user