mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-07 01:06:46 +08:00
HalfFloatPlane unittest for denormal half floats
Halffloats have a limited range. It shouldnt normally come up, but if the scale value passed in produces a small value, the half floats will be denormals, which are slow and/or flust to zero. This test ensures they behave the same in C and SIMD and tests the performance of denormals. TEST=TestHalfFloatPlane_denormal BUG=libyuv:560 R=hubbe@chromium.org Review URL: https://codereview.chromium.org/2424233004 .
This commit is contained in:
parent
78c58ab8aa
commit
f553db2d30
12
BUILD.gn
12
BUILD.gn
@ -122,6 +122,10 @@ static_library("libyuv") {
|
|||||||
# Enable optimize for speed (-O2) over size (-Os).
|
# Enable optimize for speed (-O2) over size (-Os).
|
||||||
configs += [ "//build/config/compiler:optimize_max" ]
|
configs += [ "//build/config/compiler:optimize_max" ]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# To enable AVX2 or other cpu optimization, pass flag here
|
||||||
|
# cflags = [ "-mavx2" ]
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (libyuv_use_neon) {
|
if (libyuv_use_neon) {
|
||||||
@ -140,6 +144,14 @@ if (libyuv_use_neon) {
|
|||||||
|
|
||||||
public_configs = [ ":libyuv_config" ]
|
public_configs = [ ":libyuv_config" ]
|
||||||
|
|
||||||
|
# Always enable optimization for Release and NaCl builds (to workaround
|
||||||
|
# crbug.com/538243).
|
||||||
|
if (!is_debug) {
|
||||||
|
configs -= [ "//build/config/compiler:default_optimization" ]
|
||||||
|
# Enable optimize for speed (-O2) over size (-Os).
|
||||||
|
configs += [ "//build/config/compiler:optimize_max" ]
|
||||||
|
}
|
||||||
|
|
||||||
if (current_cpu != "arm64") {
|
if (current_cpu != "arm64") {
|
||||||
configs -= [ "//build/config/compiler:compiler_arm_fpu" ]
|
configs -= [ "//build/config/compiler:compiler_arm_fpu" ]
|
||||||
cflags = [ "-mfpu=neon" ]
|
cflags = [ "-mfpu=neon" ]
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
Name: libyuv
|
Name: libyuv
|
||||||
URL: http://code.google.com/p/libyuv/
|
URL: http://code.google.com/p/libyuv/
|
||||||
Version: 1627
|
Version: 1628
|
||||||
License: BSD
|
License: BSD
|
||||||
License File: LICENSE
|
License File: LICENSE
|
||||||
|
|
||||||
|
|||||||
@ -201,7 +201,7 @@ extern "C" {
|
|||||||
#define HAS_COPYROW_AVX
|
#define HAS_COPYROW_AVX
|
||||||
#define HAS_H422TOARGBROW_AVX2
|
#define HAS_H422TOARGBROW_AVX2
|
||||||
#define HAS_HALFFLOATROW_AVX2
|
#define HAS_HALFFLOATROW_AVX2
|
||||||
// #define HAS_HALFFLOATROW_F16C // Enable to test halffloat cast
|
// #define HAS_HALFFLOATROW_F16C // Enable to test halffloat cast
|
||||||
#define HAS_I400TOARGBROW_AVX2
|
#define HAS_I400TOARGBROW_AVX2
|
||||||
#define HAS_I422TOARGB1555ROW_AVX2
|
#define HAS_I422TOARGB1555ROW_AVX2
|
||||||
#define HAS_I422TOARGB4444ROW_AVX2
|
#define HAS_I422TOARGB4444ROW_AVX2
|
||||||
@ -330,6 +330,11 @@ extern "C" {
|
|||||||
#define HAS_YUY2TOUVROW_NEON
|
#define HAS_YUY2TOUVROW_NEON
|
||||||
#define HAS_YUY2TOYROW_NEON
|
#define HAS_YUY2TOYROW_NEON
|
||||||
|
|
||||||
|
// TODO(fbarchard): Port to 32 bit.
|
||||||
|
#if defined(__aarch64__)
|
||||||
|
#define HAS_HALFFLOATROW_NEON
|
||||||
|
#endif
|
||||||
|
|
||||||
// Effects:
|
// Effects:
|
||||||
#define HAS_ARGBADDROW_NEON
|
#define HAS_ARGBADDROW_NEON
|
||||||
#define HAS_ARGBATTENUATEROW_NEON
|
#define HAS_ARGBATTENUATEROW_NEON
|
||||||
@ -1954,6 +1959,9 @@ void HalfFloatRow_Any_AVX2(const uint16* src, uint16* dst, float scale,
|
|||||||
void HalfFloatRow_F16C(const uint16* src, uint16* dst, float scale, int width);
|
void HalfFloatRow_F16C(const uint16* src, uint16* dst, float scale, int width);
|
||||||
void HalfFloatRow_Any_F16C(const uint16* src, uint16* dst, float scale,
|
void HalfFloatRow_Any_F16C(const uint16* src, uint16* dst, float scale,
|
||||||
int width);
|
int width);
|
||||||
|
void HalfFloatRow_NEON(const uint16* src, uint16* dst, float scale, int width);
|
||||||
|
void HalfFloatRow_Any_NEON(const uint16* src, uint16* dst, float scale,
|
||||||
|
int width);
|
||||||
|
|
||||||
void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width,
|
void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width,
|
||||||
const uint8* luma, uint32 lumacoeff);
|
const uint8* luma, uint32 lumacoeff);
|
||||||
|
|||||||
@ -11,6 +11,6 @@
|
|||||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||||
#define INCLUDE_LIBYUV_VERSION_H_
|
#define INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|
||||||
#define LIBYUV_VERSION 1627
|
#define LIBYUV_VERSION 1628
|
||||||
|
|
||||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||||
|
|||||||
@ -2585,6 +2585,15 @@ int HalfFloatPlane(const uint16* src_y, int src_stride_y,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(HAS_HALFFLOATROW_NEON)
|
||||||
|
if (TestCpuFlag(kCpuHasNEON)) {
|
||||||
|
HalfFloatRow = HalfFloatRow_Any_NEON;
|
||||||
|
if (IS_ALIGNED(width, 8)) {
|
||||||
|
HalfFloatRow = HalfFloatRow_NEON;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
for (y = 0; y < height; ++y) {
|
for (y = 0; y < height; ++y) {
|
||||||
HalfFloatRow(src_y, dst_y, scale, width);
|
HalfFloatRow(src_y, dst_y, scale, width);
|
||||||
src_y += src_stride_y;
|
src_y += src_stride_y;
|
||||||
|
|||||||
@ -585,6 +585,9 @@ ANY11P16(HalfFloatRow_Any_AVX2, HalfFloatRow_AVX2, float, 1, 1, 15)
|
|||||||
#ifdef HAS_HALFFLOATROW_F16C
|
#ifdef HAS_HALFFLOATROW_F16C
|
||||||
ANY11P16(HalfFloatRow_Any_F16C, HalfFloatRow_F16C, float, 1, 1, 15)
|
ANY11P16(HalfFloatRow_Any_F16C, HalfFloatRow_F16C, float, 1, 1, 15)
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef HAS_HALFFLOATROW_NEON
|
||||||
|
ANY11P16(HalfFloatRow_Any_NEON, HalfFloatRow_NEON, float, 1, 1, 7)
|
||||||
|
#endif
|
||||||
#undef ANY11P16
|
#undef ANY11P16
|
||||||
|
|
||||||
// Any 1 to 1 with yuvconstants
|
// Any 1 to 1 with yuvconstants
|
||||||
|
|||||||
@ -2710,6 +2710,32 @@ void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1,
|
|||||||
: "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
|
: "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void HalfFloatRow_NEON(const uint16* src, uint16* dst, float scale, int width) {
|
||||||
|
asm volatile (
|
||||||
|
"1: \n"
|
||||||
|
MEMACCESS(0)
|
||||||
|
"ld1 {v1.16b}, [%0], #16 \n" // load 8 shorts
|
||||||
|
"subs %w2, %w2, #8 \n" // 8 pixels per loop
|
||||||
|
"uxtl v2.4s, v1.4h \n" // 8 int's
|
||||||
|
"uxtl2 v1.4s, v1.8h \n"
|
||||||
|
"scvtf v2.4s, v2.4s \n" // 8 floats
|
||||||
|
"scvtf v1.4s, v1.4s \n"
|
||||||
|
"fmul v2.4s, v2.4s, %3.s[0] \n" // adjust exponent
|
||||||
|
"fmul v1.4s, v1.4s, %3.s[0] \n"
|
||||||
|
"uqshrn v4.4h, v2.4s, #13 \n" // isolate halffloat
|
||||||
|
"uqshrn2 v4.8h, v1.4s, #13 \n"
|
||||||
|
MEMACCESS(1)
|
||||||
|
"st1 {v4.16b}, [%1], #16 \n" // store 8 shorts
|
||||||
|
"b.gt 1b \n"
|
||||||
|
: "+r"(src), // %0
|
||||||
|
"+r"(dst), // %1
|
||||||
|
"+r"(width) // %2
|
||||||
|
: "w"(scale * 1.9259299444e-34f) // %3
|
||||||
|
: "cc", "memory", "v1", "v2", "v4"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
|
#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|||||||
@ -2081,9 +2081,12 @@ TEST_F(LibYUVPlanarTest, TestARGBPolynomial) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane) {
|
int TestHalfFloatPlane(int benchmark_width, int benchmark_height,
|
||||||
|
int benchmark_iterations,
|
||||||
|
int disable_cpu_flags, int benchmark_cpu_info,
|
||||||
|
float scale) {
|
||||||
int i, j;
|
int i, j;
|
||||||
const int y_plane_size = benchmark_width_ * benchmark_height_ * 2;
|
const int y_plane_size = benchmark_width * benchmark_height * 2;
|
||||||
|
|
||||||
align_buffer_page_end(orig_y, y_plane_size);
|
align_buffer_page_end(orig_y, y_plane_size);
|
||||||
align_buffer_page_end(dst_c, y_plane_size);
|
align_buffer_page_end(dst_c, y_plane_size);
|
||||||
@ -2093,32 +2096,62 @@ TEST_F(LibYUVPlanarTest, TestHalfFloatPlane) {
|
|||||||
memset(dst_opt, 1, y_plane_size);
|
memset(dst_opt, 1, y_plane_size);
|
||||||
|
|
||||||
// Disable all optimizations.
|
// Disable all optimizations.
|
||||||
MaskCpuFlags(disable_cpu_flags_);
|
MaskCpuFlags(disable_cpu_flags);
|
||||||
double c_time = get_time();
|
double c_time = get_time();
|
||||||
for (j = 0; j < benchmark_iterations_; j++) {
|
for (j = 0; j < benchmark_iterations; j++) {
|
||||||
HalfFloatPlane((uint16*)orig_y, benchmark_width_ * 2,
|
HalfFloatPlane((uint16*)orig_y, benchmark_width * 2,
|
||||||
(uint16*)dst_c, benchmark_width_ * 2,
|
(uint16*)dst_c, benchmark_width * 2,
|
||||||
1.0f / 4096.0f, benchmark_width_, benchmark_height_);
|
scale, benchmark_width, benchmark_height);
|
||||||
}
|
}
|
||||||
c_time = (get_time() - c_time) / benchmark_iterations_;
|
c_time = (get_time() - c_time) / benchmark_iterations;
|
||||||
|
|
||||||
// Enable optimizations.
|
// Enable optimizations.
|
||||||
MaskCpuFlags(benchmark_cpu_info_);
|
MaskCpuFlags(benchmark_cpu_info);
|
||||||
double opt_time = get_time();
|
double opt_time = get_time();
|
||||||
for (j = 0; j < benchmark_iterations_; j++) {
|
for (j = 0; j < benchmark_iterations; j++) {
|
||||||
HalfFloatPlane((uint16*)orig_y, benchmark_width_ * 2,
|
HalfFloatPlane((uint16*)orig_y, benchmark_width * 2,
|
||||||
(uint16*)dst_opt, benchmark_width_ * 2,
|
(uint16*)dst_opt, benchmark_width * 2,
|
||||||
1.0f / 4096.0f, benchmark_width_, benchmark_height_);
|
scale, benchmark_width, benchmark_height);
|
||||||
}
|
}
|
||||||
opt_time = (get_time() - opt_time) / benchmark_iterations_;
|
opt_time = (get_time() - opt_time) / benchmark_iterations;
|
||||||
|
|
||||||
|
int diff = 0;
|
||||||
for (i = 0; i < y_plane_size; ++i) {
|
for (i = 0; i < y_plane_size; ++i) {
|
||||||
EXPECT_EQ(dst_c[i], dst_opt[i]);
|
diff = dst_c[i] - dst_opt[i];
|
||||||
|
if (diff) break;
|
||||||
}
|
}
|
||||||
|
|
||||||
free_aligned_buffer_page_end(orig_y);
|
free_aligned_buffer_page_end(orig_y);
|
||||||
free_aligned_buffer_page_end(dst_c);
|
free_aligned_buffer_page_end(dst_c);
|
||||||
free_aligned_buffer_page_end(dst_opt);
|
free_aligned_buffer_page_end(dst_opt);
|
||||||
|
return diff;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 5 bit exponent with bias of 15 will underflow to a denormal if scale causes
|
||||||
|
// exponent to be less than 0. 15 - log2(65536) = -1/ This shouldnt normally
|
||||||
|
// happen since scale is 1/(1<<bits) where bits is 9, 10 or 12.
|
||||||
|
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_denormal) {
|
||||||
|
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
|
||||||
|
benchmark_iterations_,
|
||||||
|
disable_cpu_flags_, benchmark_cpu_info_,
|
||||||
|
1.0f / 65536.0f);
|
||||||
|
EXPECT_EQ(diff, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Opt) {
|
||||||
|
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
|
||||||
|
benchmark_iterations_,
|
||||||
|
disable_cpu_flags_, benchmark_cpu_info_,
|
||||||
|
1.0f / 4096.0f);
|
||||||
|
EXPECT_EQ(diff, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Offby1) {
|
||||||
|
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
|
||||||
|
benchmark_iterations_,
|
||||||
|
disable_cpu_flags_, benchmark_cpu_info_,
|
||||||
|
1.0f / 1023.0f);
|
||||||
|
EXPECT_EQ(diff, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(LibYUVPlanarTest, TestARGBLumaColorTable) {
|
TEST_F(LibYUVPlanarTest, TestARGBLumaColorTable) {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user