16-bit float support

This commit is contained in:
Daniel Lemire 2024-08-23 21:11:07 -04:00 committed by Anders Dalvander
parent 31cc0d1a0e
commit bfcff49c83
3 changed files with 215 additions and 6 deletions

View File

@ -221,13 +221,19 @@ fastfloat_really_inline constexpr bool cpp20_and_in_constexpr() {
template <typename T>
struct is_supported_float_type
: std::integral_constant<bool, std::is_same<T, float>::value ||
std::is_same<T, double>::value
: std::integral_constant<
bool, std::is_same<T, float>::value || std::is_same<T, double>::value
#ifdef __STDCPP_FLOAT32_T__
|| std::is_same<T, std::float32_t>::value
#endif
#ifdef __STDCPP_FLOAT64_T__
|| std::is_same<T, std::float64_t>::value
#endif
#ifdef __STDCPP_FLOAT16_T__
|| std::is_same<T, std::float16_t>::value
#endif
#ifdef __STDCPP_BFLOAT16_T__
|| std::is_same<T, std::bfloat16_t>::value
#endif
> {
};
@ -622,6 +628,157 @@ inline constexpr uint64_t binary_format<double>::max_mantissa_fast_path() {
return uint64_t(2) << mantissa_explicit_bits();
}
// credit: Jakub Jelínek
#ifdef __STDCPP_FLOAT16_T__
template <typename U> struct binary_format_lookup_tables<std::float16_t, U> {
static constexpr std::float16_t powers_of_ten[] = {};
static constexpr uint64_t max_mantissa[] = {};
};
template <typename U>
constexpr std::float16_t
binary_format_lookup_tables<std::float16_t, U>::powers_of_ten[];
template <typename U>
constexpr uint64_t
binary_format_lookup_tables<std::float16_t, U>::max_mantissa[];
template <>
inline constexpr int binary_format<std::float16_t>::max_exponent_fast_path() {
return 0;
}
template <>
inline constexpr uint64_t
binary_format<std::float16_t>::max_mantissa_fast_path() {
return 0;
}
template <>
inline constexpr int binary_format<std::float16_t>::min_exponent_fast_path() {
return 0;
}
template <>
constexpr int binary_format<std::float16_t>::mantissa_explicit_bits() {
return 10;
}
template <>
constexpr int binary_format<std::float16_t>::max_exponent_round_to_even() {
return 5;
}
template <>
constexpr int binary_format<std::float16_t>::min_exponent_round_to_even() {
return -22;
}
template <> constexpr int binary_format<std::float16_t>::minimum_exponent() {
return -15;
}
template <> constexpr int binary_format<std::float16_t>::infinite_power() {
return 0x1F;
}
template <> constexpr int binary_format<std::float16_t>::sign_index() {
return 15;
}
template <>
constexpr int binary_format<std::float16_t>::largest_power_of_ten() {
return 4;
}
template <>
constexpr int binary_format<std::float16_t>::smallest_power_of_ten() {
return -27;
}
template <> constexpr size_t binary_format<std::float16_t>::max_digits() {
return 22;
}
#endif
// credit: Jakub Jelínek
#ifdef __STDCPP_BFLOAT16_T__
template <typename U> struct binary_format_lookup_tables<std::bfloat16_t, U> {
static constexpr std::bfloat16_t powers_of_ten[] = {};
static constexpr uint64_t max_mantissa[] = {};
};
template <typename U>
constexpr std::bfloat16_t
binary_format_lookup_tables<std::bfloat16_t, U>::powers_of_ten[];
template <typename U>
constexpr uint64_t
binary_format_lookup_tables<std::bfloat16_t, U>::max_mantissa[];
template <>
inline constexpr int binary_format<std::bfloat16_t>::max_exponent_fast_path() {
return 0;
}
template <>
inline constexpr uint64_t
binary_format<std::bfloat16_t>::max_mantissa_fast_path() {
return 0;
}
template <>
inline constexpr int binary_format<std::bfloat16_t>::min_exponent_fast_path() {
return 0;
}
template <>
constexpr int binary_format<std::bfloat16_t>::mantissa_explicit_bits() {
return 7;
}
template <>
constexpr int binary_format<std::bfloat16_t>::max_exponent_round_to_even() {
return 3;
}
template <>
constexpr int binary_format<std::bfloat16_t>::min_exponent_round_to_even() {
return -24;
}
template <> constexpr int binary_format<std::bfloat16_t>::minimum_exponent() {
return -127;
}
template <> constexpr int binary_format<std::bfloat16_t>::infinite_power() {
return 0xFF;
}
template <> constexpr int binary_format<std::bfloat16_t>::sign_index() {
return 15;
}
template <>
constexpr int binary_format<std::bfloat16_t>::largest_power_of_ten() {
return 38;
}
template <>
constexpr int binary_format<std::bfloat16_t>::smallest_power_of_ten() {
return -60;
}
template <> constexpr size_t binary_format<std::bfloat16_t>::max_digits() {
return 98;
}
#endif
template <>
inline constexpr uint64_t
binary_format<double>::max_mantissa_fast_path(int64_t power) {

View File

@ -275,6 +275,16 @@ bool check_file(std::string file_name) {
std::string str;
while (std::getline(newfile, str)) {
if (str.size() > 0) {
#ifdef __STDCPP_FLOAT16_T__
// Read 16-bit hex
uint16_t float16;
auto r16 =
std::from_chars(str.data(), str.data() + str.size(), float16, 16);
if (r16.ec != std::errc()) {
std::cerr << "16-bit parsing failure\n";
return false;
}
#endif
// Read 32-bit hex
uint32_t float32;
auto r32 = std::from_chars(str.data() + 5, str.data() + str.size(),
@ -294,6 +304,17 @@ bool check_file(std::string file_name) {
// The string to parse:
char const *number_string = str.data() + 31;
char const *end_of_string = str.data() + str.size();
#ifdef __STDCPP_FLOAT16_T__
// Parse as 16-bit float
std::float16_t parsed_16{};
// auto fast_float_r16 =
fast_float::from_chars(number_string, end_of_string, parsed_16);
// if (fast_float_r16.ec != std::errc() &&
// fast_float_r16.ec != std::errc::result_out_of_range) {
// std::cerr << "16-bit fast_float parsing failure for: " + str +
// "\n"; return false;
// }
#endif
// Parse as 32-bit float
float parsed_32;
auto fast_float_r32 =
@ -313,11 +334,29 @@ bool check_file(std::string file_name) {
return false;
}
// Convert the floats to unsigned ints.
#ifdef __STDCPP_FLOAT16_T__
uint16_t float16_parsed;
#endif
uint32_t float32_parsed;
uint64_t float64_parsed;
#ifdef __STDCPP_FLOAT16_T__
::memcpy(&float16_parsed, &parsed_16, sizeof(parsed_16));
#endif
::memcpy(&float32_parsed, &parsed_32, sizeof(parsed_32));
::memcpy(&float64_parsed, &parsed_64, sizeof(parsed_64));
// Compare with expected results
#ifdef __STDCPP_FLOAT16_T__
if (float16_parsed != float16) {
std::cout << "bad 16 " << str << std::endl;
std::cout << "parsed as " << iHexAndDec(parsed_16) << std::endl;
std::cout << "as raw uint16_t, parsed = " << float16_parsed
<< ", expected = " << float16 << std::endl;
std::cout << "fesetround: " << round_name(d) << std::endl;
fesetround(FE_TONEAREST);
return false;
}
#endif
if (float32_parsed != float32) {
std::cout << "bad 32 " << str << std::endl;
std::cout << "parsed as " << iHexAndDec(parsed_32) << std::endl;

View File

@ -112,7 +112,7 @@ bool large() {
}
int main() {
std::string const input = "3.1416 xyz ";
std::string input = "3.1416 xyz ";
double result;
auto answer =
fast_float::from_chars(input.data(), input.data() + input.size(), result);
@ -121,6 +121,19 @@ int main() {
return EXIT_FAILURE;
}
std::cout << "parsed the number " << result << std::endl;
#ifdef __STDCPP_FLOAT16_T__
// Parse as 16-bit float
std::float16_t parsed_16{};
input = "10000e-1452";
auto fast_float_r16 = fast_float::from_chars(
input.data(), input.data() + input.size(), parsed_16);
if (fast_float_r16.ec != std::errc() &&
fast_float_r16.ec != std::errc::result_out_of_range) {
std::cerr << "16-bit fast_float parsing failure for: " + input + "\n";
return false;
}
std::cout << "parsed the 16-bit value " << float(parsed_16) << std::endl;
#endif
if (!small()) {
printf("Bug\n");
return EXIT_FAILURE;