mirror of
https://chromium.googlesource.com/libyuv/libyuv
synced 2025-12-06 08:46:47 +08:00
HalfFloat fix SigIll on aarch64
- Remove special case Scale of 1 which used fp16 cvt but requires cpuid - Port aarch64 to aarch32 - Use C for aarch32 with small (denormal) scale value Bug: 377693555 Change-Id: I38e207e79ac54907ed6e65118b8109288fddb207 Reviewed-on: https://chromium-review.googlesource.com/c/libyuv/libyuv/+/6043392 Reviewed-by: Wan-Teh Chang <wtc@google.com>
This commit is contained in:
parent
307b951229
commit
595146434a
@ -6670,14 +6670,6 @@ void HalfFloatRow_SVE2(const uint16_t* src,
|
||||
uint16_t* dst,
|
||||
float scale,
|
||||
int width);
|
||||
void HalfFloat1Row_NEON(const uint16_t* src,
|
||||
uint16_t* dst,
|
||||
float scale,
|
||||
int width);
|
||||
void HalfFloat1Row_Any_NEON(const uint16_t* src_ptr,
|
||||
uint16_t* dst_ptr,
|
||||
float param,
|
||||
int width);
|
||||
void HalfFloat1Row_SVE2(const uint16_t* src,
|
||||
uint16_t* dst,
|
||||
float scale,
|
||||
|
||||
@ -5208,11 +5208,18 @@ int HalfFloatPlane(const uint16_t* src_y,
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_HALFFLOATROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
HalfFloatRow =
|
||||
scale == 1.0f ? HalfFloat1Row_Any_NEON : HalfFloatRow_Any_NEON;
|
||||
if (TestCpuFlag(kCpuHasNEON)
|
||||
#if defined(__arm__)
|
||||
// When scale is 1/65535 the scale * 2^-112 used to convert is a denormal.
|
||||
// But when Neon vmul is asked to multiply a normal float by that
|
||||
// denormal scale, even though the result would have been normal, it
|
||||
// flushes to zero. The scalar version of vmul supports denormals.
|
||||
&& scale >= 1.0f / 4096.0f
|
||||
#endif
|
||||
) {
|
||||
HalfFloatRow = HalfFloatRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
HalfFloatRow = scale == 1.0f ? HalfFloat1Row_NEON : HalfFloatRow_NEON;
|
||||
HalfFloatRow = HalfFloatRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -1813,25 +1813,7 @@ ANY11P16(HalfFloat1Row_Any_F16C,
|
||||
15)
|
||||
#endif
|
||||
#ifdef HAS_HALFFLOATROW_NEON
|
||||
#ifdef __aarch64__
|
||||
ANY11P16(HalfFloatRow_Any_NEON, HalfFloatRow_NEON, uint16_t, uint16_t, 2, 2, 15)
|
||||
ANY11P16(HalfFloat1Row_Any_NEON,
|
||||
HalfFloat1Row_NEON,
|
||||
uint16_t,
|
||||
uint16_t,
|
||||
2,
|
||||
2,
|
||||
15)
|
||||
#else
|
||||
ANY11P16(HalfFloatRow_Any_NEON, HalfFloatRow_NEON, uint16_t, uint16_t, 2, 2, 7)
|
||||
ANY11P16(HalfFloat1Row_Any_NEON,
|
||||
HalfFloat1Row_NEON,
|
||||
uint16_t,
|
||||
uint16_t,
|
||||
2,
|
||||
2,
|
||||
7)
|
||||
#endif
|
||||
#endif
|
||||
#ifdef HAS_HALFFLOATROW_MSA
|
||||
ANY11P16(HalfFloatRow_Any_MSA, HalfFloatRow_MSA, uint16_t, uint16_t, 2, 2, 31)
|
||||
|
||||
@ -3536,59 +3536,41 @@ void SobelYRow_NEON(const uint8_t* src_y0,
|
||||
}
|
||||
|
||||
// %y passes a float as a scalar vector for vector * scalar multiply.
|
||||
// the regoster must be d0 to d15 and indexed with [0] or [1] to access
|
||||
// the register must be d0 to d15 and indexed with [0] or [1] to access
|
||||
// the float in the first or second float of the d-reg
|
||||
|
||||
void HalfFloat1Row_NEON(const uint16_t* src,
|
||||
uint16_t* dst,
|
||||
float /*unused*/,
|
||||
int width) {
|
||||
asm volatile (
|
||||
|
||||
"1: \n"
|
||||
"vld1.8 {q1}, [%0]! \n" // load 8 shorts
|
||||
"subs %2, %2, #8 \n" // 8 pixels per loop
|
||||
"vmovl.u16 q2, d2 \n" // 8 int's
|
||||
"vmovl.u16 q3, d3 \n"
|
||||
"vcvt.f32.u32 q2, q2 \n" // 8 floats
|
||||
"vcvt.f32.u32 q3, q3 \n"
|
||||
"vmul.f32 q2, q2, %y3 \n" // adjust exponent
|
||||
"vmul.f32 q3, q3, %y3 \n"
|
||||
"vqshrn.u32 d2, q2, #13 \n" // isolate halffloat
|
||||
"vqshrn.u32 d3, q3, #13 \n"
|
||||
"vst1.8 {q1}, [%1]! \n"
|
||||
"bgt 1b \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst), // %1
|
||||
"+r"(width) // %2
|
||||
: "w"(1.9259299444e-34f) // %3
|
||||
: "cc", "memory", "q1", "q2", "q3");
|
||||
}
|
||||
|
||||
void HalfFloatRow_NEON(const uint16_t* src,
|
||||
uint16_t* dst,
|
||||
float scale,
|
||||
int width) {
|
||||
asm volatile (
|
||||
asm volatile (
|
||||
|
||||
"1: \n"
|
||||
"vld1.8 {q1}, [%0]! \n" // load 8 shorts
|
||||
"subs %2, %2, #8 \n" // 8 pixels per loop
|
||||
"vmovl.u16 q2, d2 \n" // 8 int's
|
||||
"vmovl.u16 q3, d3 \n"
|
||||
"vcvt.f32.u32 q2, q2 \n" // 8 floats
|
||||
"vcvt.f32.u32 q3, q3 \n"
|
||||
"vmul.f32 q2, q2, %y3 \n" // adjust exponent
|
||||
"vmul.f32 q3, q3, %y3 \n"
|
||||
"vqshrn.u32 d2, q2, #13 \n" // isolate halffloat
|
||||
"vqshrn.u32 d3, q3, #13 \n"
|
||||
"vst1.8 {q1}, [%1]! \n"
|
||||
"vld1.16 {q0, q1}, [%0]! \n" // load 16 shorts
|
||||
"subs %2, %2, #16 \n" // 16 pixels per loop
|
||||
"vmovl.u16 q8, d0 \n"
|
||||
"vmovl.u16 q9, d1 \n"
|
||||
"vmovl.u16 q10, d2 \n"
|
||||
"vmovl.u16 q11, d3 \n"
|
||||
"vcvt.f32.u32 q8, q8 \n"
|
||||
"vcvt.f32.u32 q9, q9 \n"
|
||||
"vcvt.f32.u32 q10, q10 \n"
|
||||
"vcvt.f32.u32 q11, q11 \n"
|
||||
"vmul.f32 q8, q8, %y3 \n" // adjust exponent
|
||||
"vmul.f32 q9, q9, %y3 \n"
|
||||
"vmul.f32 q10, q10, %y3 \n"
|
||||
"vmul.f32 q11, q11, %y3 \n"
|
||||
"vqshrn.u32 d0, q8, #13 \n" // isolate halffloat
|
||||
"vqshrn.u32 d1, q9, #13 \n"
|
||||
"vqshrn.u32 d2, q10, #13 \n"
|
||||
"vqshrn.u32 d3, q11, #13 \n"
|
||||
"vst1.16 {q0, q1}, [%1]! \n" // store 16 fp16
|
||||
"bgt 1b \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst), // %1
|
||||
"+r"(width) // %2
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst), // %1
|
||||
"+r"(width) // %2
|
||||
: "w"(scale * 1.9259299444e-34f) // %3
|
||||
: "cc", "memory", "q1", "q2", "q3");
|
||||
: "cc", "memory", "q0", "q1", "q8", "q9", "q10", "q11");
|
||||
}
|
||||
|
||||
void ByteToFloatRow_NEON(const uint8_t* src,
|
||||
|
||||
@ -4664,37 +4664,6 @@ void SobelYRow_NEON(const uint8_t* src_y0,
|
||||
);
|
||||
}
|
||||
|
||||
// Caveat - rounds float to half float whereas scaling version truncates.
|
||||
void HalfFloat1Row_NEON(const uint16_t* src,
|
||||
uint16_t* dst,
|
||||
float /*unused*/,
|
||||
int width) {
|
||||
asm volatile(
|
||||
"1: \n"
|
||||
"ldp q0, q1, [%0], #32 \n" // load 16 shorts
|
||||
"subs %w2, %w2, #16 \n" // 16 pixels per loop
|
||||
"uxtl v2.4s, v0.4h \n"
|
||||
"uxtl v4.4s, v1.4h \n"
|
||||
"uxtl2 v3.4s, v0.8h \n"
|
||||
"uxtl2 v5.4s, v1.8h \n"
|
||||
"prfm pldl1keep, [%0, 448] \n"
|
||||
"scvtf v2.4s, v2.4s \n"
|
||||
"scvtf v4.4s, v4.4s \n"
|
||||
"scvtf v3.4s, v3.4s \n"
|
||||
"scvtf v5.4s, v5.4s \n"
|
||||
"fcvtn v0.4h, v2.4s \n"
|
||||
"fcvtn v1.4h, v4.4s \n"
|
||||
"fcvtn2 v0.8h, v3.4s \n"
|
||||
"fcvtn2 v1.8h, v5.4s \n"
|
||||
"stp q0, q1, [%1], #32 \n" // store 16 shorts
|
||||
"b.gt 1b \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst), // %1
|
||||
"+r"(width) // %2
|
||||
:
|
||||
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5");
|
||||
}
|
||||
|
||||
void HalfFloatRow_NEON(const uint16_t* src,
|
||||
uint16_t* dst,
|
||||
float scale,
|
||||
@ -4717,10 +4686,10 @@ void HalfFloatRow_NEON(const uint16_t* src,
|
||||
"fmul v3.4s, v3.4s, %3.s[0] \n"
|
||||
"fmul v5.4s, v5.4s, %3.s[0] \n"
|
||||
"uqshrn v0.4h, v2.4s, #13 \n" // isolate halffloat
|
||||
"uqshrn v1.4h, v4.4s, #13 \n" // isolate halffloat
|
||||
"uqshrn v1.4h, v4.4s, #13 \n"
|
||||
"uqshrn2 v0.8h, v3.4s, #13 \n"
|
||||
"uqshrn2 v1.8h, v5.4s, #13 \n"
|
||||
"stp q0, q1, [%1], #32 \n" // store 16 shorts
|
||||
"stp q0, q1, [%1], #32 \n" // store 16 fp16
|
||||
"b.gt 1b \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst), // %1
|
||||
|
||||
@ -1551,14 +1551,14 @@ TEST_F(LibYUVPlanarTest, TestAffine) {
|
||||
#endif
|
||||
}
|
||||
|
||||
static int TestCopyPlane(int width,
|
||||
int height,
|
||||
static int TestCopyPlane(int benchmark_width,
|
||||
int benchmark_height,
|
||||
int benchmark_iterations,
|
||||
int disable_cpu_flags,
|
||||
int benchmark_cpu_info,
|
||||
int invert,
|
||||
int off) {
|
||||
int y_plane_size = width * height;
|
||||
const int y_plane_size = benchmark_width * benchmark_height;
|
||||
align_buffer_page_end(orig_y, y_plane_size + off);
|
||||
align_buffer_page_end(dst_c, y_plane_size);
|
||||
align_buffer_page_end(dst_opt, y_plane_size);
|
||||
@ -1570,13 +1570,13 @@ static int TestCopyPlane(int width,
|
||||
// Disable all optimizations.
|
||||
MaskCpuFlags(disable_cpu_flags);
|
||||
for (int i = 0; i < benchmark_iterations; i++) {
|
||||
CopyPlane(orig_y + off, width, dst_c, width, width, height * invert);
|
||||
CopyPlane(orig_y + off, benchmark_width, dst_c, benchmark_width, benchmark_width, benchmark_height * invert);
|
||||
}
|
||||
|
||||
// Enable optimizations.
|
||||
MaskCpuFlags(benchmark_cpu_info);
|
||||
for (int i = 0; i < benchmark_iterations; i++) {
|
||||
CopyPlane(orig_y + off, width, dst_opt, width, width, height * invert);
|
||||
CopyPlane(orig_y + off, benchmark_width, dst_opt, benchmark_width, benchmark_width, benchmark_height * invert);
|
||||
}
|
||||
|
||||
int max_diff = 0;
|
||||
@ -2479,36 +2479,37 @@ static int TestHalfFloatPlane(int benchmark_width,
|
||||
int disable_cpu_flags,
|
||||
int benchmark_cpu_info,
|
||||
float scale,
|
||||
int mask) {
|
||||
int mask,
|
||||
int invert,
|
||||
int off) {
|
||||
int i, j;
|
||||
const int y_plane_size = benchmark_width * benchmark_height * 2;
|
||||
align_buffer_page_end(orig_y, y_plane_size + off);
|
||||
align_buffer_page_end(dst_c, y_plane_size);
|
||||
align_buffer_page_end(dst_opt, y_plane_size);
|
||||
|
||||
align_buffer_page_end(orig_y, y_plane_size * 3);
|
||||
uint8_t* dst_opt = orig_y + y_plane_size;
|
||||
uint8_t* dst_c = orig_y + y_plane_size * 2;
|
||||
|
||||
MemRandomize(orig_y, y_plane_size);
|
||||
memset(dst_c, 0, y_plane_size);
|
||||
memset(dst_opt, 1, y_plane_size);
|
||||
MemRandomize(orig_y + off, y_plane_size);
|
||||
memset(dst_c, 1, y_plane_size);
|
||||
memset(dst_opt, 2, y_plane_size);
|
||||
|
||||
for (i = 0; i < y_plane_size / 2; ++i) {
|
||||
reinterpret_cast<uint16_t*>(orig_y)[i] &= mask;
|
||||
reinterpret_cast<uint16_t*>(orig_y + off)[i] &= mask;
|
||||
}
|
||||
|
||||
// Disable all optimizations.
|
||||
MaskCpuFlags(disable_cpu_flags);
|
||||
for (j = 0; j < benchmark_iterations; j++) {
|
||||
HalfFloatPlane(reinterpret_cast<uint16_t*>(orig_y), benchmark_width * 2,
|
||||
HalfFloatPlane(reinterpret_cast<uint16_t*>(orig_y + off), benchmark_width * 2,
|
||||
reinterpret_cast<uint16_t*>(dst_c), benchmark_width * 2,
|
||||
scale, benchmark_width, benchmark_height);
|
||||
scale, benchmark_width, benchmark_height * invert);
|
||||
}
|
||||
|
||||
// Enable optimizations.
|
||||
MaskCpuFlags(benchmark_cpu_info);
|
||||
for (j = 0; j < benchmark_iterations; j++) {
|
||||
HalfFloatPlane(reinterpret_cast<uint16_t*>(orig_y), benchmark_width * 2,
|
||||
HalfFloatPlane(reinterpret_cast<uint16_t*>(orig_y + off), benchmark_width * 2,
|
||||
reinterpret_cast<uint16_t*>(dst_opt), benchmark_width * 2,
|
||||
scale, benchmark_width, benchmark_height);
|
||||
scale, benchmark_width, benchmark_height * invert);
|
||||
}
|
||||
|
||||
int max_diff = 0;
|
||||
@ -2525,6 +2526,76 @@ static int TestHalfFloatPlane(int benchmark_width,
|
||||
return max_diff;
|
||||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_One) {
|
||||
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_, 1.0f, 65535, +1, 0);
|
||||
EXPECT_LE(diff, 1);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_Opt) {
|
||||
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_, 1.0f / 65535.0f, 65535, +1, 0);
|
||||
EXPECT_EQ(0, diff);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_12bit_Opt) {
|
||||
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_, 1.0f / 4095.0f, 4095, +1, 0);
|
||||
EXPECT_EQ(0, diff);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_10bit_Opt) {
|
||||
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_, 1.0f / 1023.0f, 1023, +1, 0);
|
||||
EXPECT_EQ(0, diff);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_9bit_Opt) {
|
||||
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_, 1.0f / 511.0f, 511, +1, 0);
|
||||
EXPECT_EQ(0, diff);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Any) {
|
||||
int diff = TestHalfFloatPlane(benchmark_width_ + 1, benchmark_height_,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_, 1.0f / 4096.0f, 4095, +1, 0);
|
||||
EXPECT_EQ(0, diff);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Unaligned) {
|
||||
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_, 1.0f / 4096.0f, 4095, +1, 2);
|
||||
EXPECT_EQ(0, diff);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Invert) {
|
||||
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_, 1.0f / 4096.0f, 4095, -1, 0);
|
||||
EXPECT_EQ(0, diff);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Opt) {
|
||||
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_, 1.0f / 4096.0f, 4095, +1, 0);
|
||||
EXPECT_EQ(0, diff);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_denormal) {
|
||||
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_, 1.0f / 65535.0f, 65535, +1, 0);
|
||||
EXPECT_EQ(0, diff);
|
||||
}
|
||||
|
||||
#if defined(__arm__)
|
||||
static void EnableFlushDenormalToZero(void) {
|
||||
uint32_t cw;
|
||||
@ -2535,78 +2606,40 @@ static void EnableFlushDenormalToZero(void) {
|
||||
: "=r"(cw)
|
||||
::"memory", "cc"); // Clobber List
|
||||
}
|
||||
#endif
|
||||
|
||||
static void DisableFlushDenormalToZero(void) {
|
||||
uint32_t cw;
|
||||
asm volatile (
|
||||
"vmrs %0, fpscr \n"
|
||||
"bic %0, %0, #0x1000000 \n"
|
||||
"vmsr fpscr, %0 \n"
|
||||
: "=r"(cw)
|
||||
::"memory", "cc"); // Clobber List
|
||||
}
|
||||
|
||||
// 5 bit exponent with bias of 15 will underflow to a denormal if scale causes
|
||||
// exponent to be less than 0. 15 - log2(65536) = -1/ This shouldnt normally
|
||||
// happen since scale is 1/(1<<bits) where bits is 9, 10 or 12.
|
||||
|
||||
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_denormal) {
|
||||
// 32 bit arm rounding on denormal case is off by 1 compared to C.
|
||||
#if defined(__arm__)
|
||||
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_flush_denormal) {
|
||||
// 32 bit arm rounding on denormal case is off by 1 compared to C.
|
||||
EnableFlushDenormalToZero();
|
||||
#endif
|
||||
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_, 1.0f / 65536.0f, 65535);
|
||||
benchmark_cpu_info_, 1.0f / 65535.0f, 65535, +1, 0);
|
||||
DisableFlushDenormalToZero();
|
||||
EXPECT_EQ(0, diff);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_One) {
|
||||
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_10bit_flush_denormal) {
|
||||
EnableFlushDenormalToZero();
|
||||
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_, 1.0f, 65535);
|
||||
EXPECT_LE(diff, 1);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_16bit_Opt) {
|
||||
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_, 1.0f / 4096.0f, 65535);
|
||||
benchmark_cpu_info_, 1.0f / 1023.0f, 1023, +1, 0);
|
||||
DisableFlushDenormalToZero();
|
||||
EXPECT_EQ(0, diff);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_10bit_Opt) {
|
||||
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_, 1.0f / 1024.0f, 1023);
|
||||
EXPECT_EQ(0, diff);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_9bit_Opt) {
|
||||
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_, 1.0f / 512.0f, 511);
|
||||
EXPECT_EQ(0, diff);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Opt) {
|
||||
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_, 1.0f / 4096.0f, 4095);
|
||||
EXPECT_EQ(0, diff);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_Offby1) {
|
||||
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_, 1.0f / 4095.0f, 4095);
|
||||
EXPECT_EQ(0, diff);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_One) {
|
||||
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_, 1.0f, 2047);
|
||||
EXPECT_EQ(0, diff);
|
||||
}
|
||||
|
||||
TEST_F(LibYUVPlanarTest, TestHalfFloatPlane_12bit_One) {
|
||||
int diff = TestHalfFloatPlane(benchmark_width_, benchmark_height_,
|
||||
benchmark_iterations_, disable_cpu_flags_,
|
||||
benchmark_cpu_info_, 1.0f, 4095);
|
||||
EXPECT_LE(diff, 1);
|
||||
}
|
||||
#endif // defined(__arm__)
|
||||
|
||||
static float TestByteToFloat(int benchmark_width,
|
||||
int benchmark_height,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user