mirror of
https://github.com/fastfloat/fast_float.git
synced 2026-02-09 11:16:45 +08:00
Merge 6bc8902d94959d720d9cf1b2cd99813f8595a76d into 221a4920db7d68d33ab9794af602daef19667351
This commit is contained in:
commit
27cbb3ada4
@ -9,3 +9,4 @@ Jan Pharago
|
||||
Maya Warrier
|
||||
Taha Khokhar
|
||||
Anders Dalvander
|
||||
Elle Solomina
|
||||
|
||||
55
README.md
55
README.md
@ -1,7 +1,14 @@
|
||||
|
||||
## fast_float number parsing library: 4x faster than strtod
|
||||
|
||||
[](https://github.com/fastfloat/fast_float/actions/workflows/ubuntu22.yml)
|
||||
[](https://github.com/irainman/fast_float/actions/workflows/ubuntu22.yml)
|
||||
[](https://github.com/irainman/fast_float/actions/workflows/ubuntu22-clang.yml)
|
||||
[](https://github.com/irainman/fast_float/actions/workflows/ubuntu24.yml)
|
||||
[](https://github.com/irainman/fast_float/actions/workflows/ubuntu24-cxx20.yml)
|
||||
[](https://github.com/irainman/fast_float/actions/workflows/alpine.yml)
|
||||
[](https://github.com/irainman/fast_float/actions/workflows/vs17-ci.yml)
|
||||
[](https://github.com/irainman/fast_float/actions/workflows/vs17-cxx20.yml)
|
||||
[](https://github.com/irainman/fast_float/actions/workflows/vs17-clang-ci.yml)
|
||||
[](https://www.codefactor.io/repository/github/irainman/fast_float)
|
||||
|
||||
The fast_float library provides fast header-only implementations for the C++
|
||||
from_chars functions for `float` and `double` types as well as integer types.
|
||||
@ -35,7 +42,7 @@ struct from_chars_result {
|
||||
};
|
||||
```
|
||||
|
||||
It parses the character sequence `[first, last)` for a number. It parses
|
||||
It parses the character sequence `[first, last]` for a number. It parses
|
||||
floating-point numbers expecting a locale-independent format equivalent to the
|
||||
C++17 from_chars function. The resulting floating-point value is the closest
|
||||
floating-point values (using either `float` or `double`), using the "round to
|
||||
@ -48,7 +55,8 @@ parsed value. In case of error, the returned `ec` contains a representative
|
||||
error, otherwise the default (`std::errc()`) value is stored.
|
||||
|
||||
The implementation does not throw and does not allocate memory (e.g., with `new`
|
||||
or `malloc`).
|
||||
or `malloc`) and can be usable in the kernel, embeded and other scenarious that
|
||||
relays on such behavior.
|
||||
|
||||
It will parse infinity and nan values.
|
||||
|
||||
@ -291,7 +299,7 @@ int main() {
|
||||
}
|
||||
```
|
||||
|
||||
## Advanced options: using commas as decimal separator, JSON and Fortran
|
||||
## Advanced options: using commas as decimal separator, parse JSON, Fortran and more
|
||||
|
||||
The C++ standard stipulate that `from_chars` has to be locale-independent. In
|
||||
particular, the decimal separator has to be the period (`.`). However, some
|
||||
@ -380,6 +388,42 @@ int main() {
|
||||
}
|
||||
```
|
||||
|
||||
## You also can use some additional options to maximize performance and reduce size (made by HedgehogInTheCPP):
|
||||
|
||||
There is a really common use case in mathematical and other abstract syntax tree (AST)-like parsers that already processes
|
||||
the sign and all other symbols before any number by itself. In this case you can use FastFloat to only parse positive numbers
|
||||
in all supported formats with macros `FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN`, which significantly reduce the code size
|
||||
and improve performance. You also can use macros `FASTFLOAT_ISNOT_CHECKED_BOUNDS` if your code already checks bounds;
|
||||
it's very likely because all parsers need to check the first character by itself before parsing. Additionally, you can use
|
||||
macros `FASTFLOAT_ONLY_ROUNDS_TO_NEAREST_SUPPORTED` if you only need `FE_TONEAREST` rounding mode in the parsing; this option
|
||||
also improves performance a bit and reduces code size. In the high-performance example, I also use the [fmt library](https://github.com/fmtlib/fmt), which also
|
||||
supports all C++ standards since C++11. I also recommend using `string_view` everywhere if it's possible; it's available
|
||||
since C++17, and if you want maximum performance, use the latest compiler with the latest C++ with maximum optimization:
|
||||
```
|
||||
-O3 -DNDEBUG + LTO
|
||||
```
|
||||
```C++
|
||||
#define FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||
#define FASTFLOAT_ISNOT_CHECKED_BOUNDS
|
||||
#define FASTFLOAT_ONLY_ROUNDS_TO_NEAREST_SUPPORTED
|
||||
#include "fast_float/fast_float.h"
|
||||
#include "fmt/base.h"
|
||||
#include <string_view>
|
||||
|
||||
int main() {
|
||||
std::string_view input = "23.14069263277926900572";
|
||||
double result;
|
||||
auto answer = fast_float::from_chars(input.data(), input.data() + input.size(), result);
|
||||
if ((answer.ec != std::errc()) || ((result != 23.14069263277927 /*properly rounded value */)))
|
||||
{
|
||||
fmt::print(stderr, "parsing failure!\n the number {}.", result);
|
||||
return 1;
|
||||
}
|
||||
fmt::print("parsed the number {}.", result);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
|
||||
## Multiplication of an integer by a power of 10
|
||||
An integer `W` can be multiplied by a power of ten `10^Q` and
|
||||
converted to `double` with correctly rounded value
|
||||
@ -424,7 +468,6 @@ float: 12345678 * 10^23 = 1.23456782e+30 (==expected)
|
||||
Overloads of `fast_float::integer_times_pow10()` are provided for
|
||||
signed and unsigned integer types: `int64_t`, `uint64_t`, etc.
|
||||
|
||||
|
||||
## Users and Related Work
|
||||
|
||||
The fast_float library is part of:
|
||||
|
||||
@ -1,3 +1,8 @@
|
||||
|
||||
// #define FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||
// #define FASTFLOAT_ONLY_ROUNDS_TO_NEAREST_SUPPORTED
|
||||
// #define FASTFLOAT_ISNOT_CHECKED_BOUNDS
|
||||
|
||||
#include "counters/bench.h"
|
||||
#include "fast_float/fast_float.h"
|
||||
#include <charconv>
|
||||
|
||||
@ -1,10 +1,13 @@
|
||||
|
||||
// #define FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||
// #define FASTFLOAT_ONLY_ROUNDS_TO_NEAREST_SUPPORTED
|
||||
// #define FASTFLOAT_ISNOT_CHECKED_BOUNDS
|
||||
|
||||
#if defined(__linux__) || (__APPLE__ && __aarch64__)
|
||||
#define USING_COUNTERS
|
||||
#endif
|
||||
#include "counters/event_counter.h"
|
||||
#include <algorithm>
|
||||
#include "fast_float/fast_float.h"
|
||||
#include <chrono>
|
||||
#include <climits>
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
@ -19,15 +22,17 @@
|
||||
#include <sstream>
|
||||
#include <stdio.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <locale.h>
|
||||
|
||||
template <typename CharT>
|
||||
double findmax_fastfloat64(std::vector<std::basic_string<CharT>> &s) {
|
||||
double answer = 0;
|
||||
double x = 0;
|
||||
#include "fast_float/fast_float.h"
|
||||
|
||||
template <typename CharT, typename Value>
|
||||
Value findmax_fastfloat(std::vector<std::basic_string<CharT>> &s) {
|
||||
Value answer = 0;
|
||||
Value x = 0;
|
||||
for (auto &st : s) {
|
||||
auto [p, ec] = fast_float::from_chars(st.data(), st.data() + st.size(), x);
|
||||
|
||||
if (p == st.data()) {
|
||||
throw std::runtime_error("bug in findmax_fastfloat");
|
||||
}
|
||||
@ -36,42 +41,30 @@ double findmax_fastfloat64(std::vector<std::basic_string<CharT>> &s) {
|
||||
return answer;
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
double findmax_fastfloat32(std::vector<std::basic_string<CharT>> &s) {
|
||||
float answer = 0;
|
||||
float x = 0;
|
||||
for (auto &st : s) {
|
||||
auto [p, ec] = fast_float::from_chars(st.data(), st.data() + st.size(), x);
|
||||
if (p == st.data()) {
|
||||
throw std::runtime_error("bug in findmax_fastfloat");
|
||||
}
|
||||
answer = answer > x ? answer : x;
|
||||
}
|
||||
return answer;
|
||||
}
|
||||
#ifdef USING_COUNTERS
|
||||
|
||||
counters::event_collector collector{};
|
||||
|
||||
#ifdef USING_COUNTERS
|
||||
template <class T, class CharT>
|
||||
std::vector<counters::event_count>
|
||||
time_it_ns(std::vector<std::basic_string<CharT>> &lines, T const &function,
|
||||
size_t repeat) {
|
||||
uint32_t repeat) {
|
||||
std::vector<counters::event_count> aggregate;
|
||||
bool printed_bug = false;
|
||||
for (size_t i = 0; i < repeat; i++) {
|
||||
for (uint32_t i = 0; i != repeat; ++i) {
|
||||
collector.start();
|
||||
double ts = function(lines);
|
||||
auto const ts = function(lines);
|
||||
aggregate.push_back(collector.end());
|
||||
|
||||
if (ts == 0 && !printed_bug) {
|
||||
printf("bug\n");
|
||||
printed_bug = true;
|
||||
}
|
||||
aggregate.push_back(collector.end());
|
||||
}
|
||||
return aggregate;
|
||||
}
|
||||
|
||||
void pretty_print(double volume, size_t number_of_floats, std::string name,
|
||||
void pretty_print(uint64_t volume, size_t number_of_floats, std::string name,
|
||||
std::vector<counters::event_count> events) {
|
||||
double volumeMB = volume / (1024. * 1024.);
|
||||
double average_ns{0};
|
||||
@ -139,25 +132,27 @@ time_it_ns(std::vector<std::basic_string<CharT>> &lines, T const &function,
|
||||
double average = 0;
|
||||
double min_value = DBL_MAX;
|
||||
bool printed_bug = false;
|
||||
for (size_t i = 0; i < repeat; i++) {
|
||||
for (size_t i = 0; i != repeat; ++i) {
|
||||
t1 = std::chrono::high_resolution_clock::now();
|
||||
double ts = function(lines);
|
||||
auto const ts = function(lines);
|
||||
t2 = std::chrono::high_resolution_clock::now();
|
||||
|
||||
double const dif = static_cast<double>(
|
||||
std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1).count());
|
||||
average += dif;
|
||||
min_value = min_value < dif ? min_value : dif;
|
||||
|
||||
if (ts == 0 && !printed_bug) {
|
||||
printf("bug\n");
|
||||
printed_bug = true;
|
||||
}
|
||||
t2 = std::chrono::high_resolution_clock::now();
|
||||
double dif =
|
||||
std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1).count();
|
||||
average += dif;
|
||||
min_value = min_value < dif ? min_value : dif;
|
||||
}
|
||||
average /= repeat;
|
||||
return std::make_pair(min_value, average);
|
||||
}
|
||||
|
||||
void pretty_print(double volume, size_t number_of_floats, std::string name,
|
||||
std::pair<double, double> result) {
|
||||
void pretty_print(uint64_t volume, size_t number_of_floats,
|
||||
std::string const &name, std::pair<double, double> result) {
|
||||
double volumeMB = volume / (1024. * 1024.);
|
||||
printf("%-40s: %8.2f MB/s (+/- %.1f %%) ", name.data(),
|
||||
volumeMB * 1000000000 / result.first,
|
||||
@ -168,10 +163,10 @@ void pretty_print(double volume, size_t number_of_floats, std::string name,
|
||||
#endif
|
||||
|
||||
// this is okay, all chars are ASCII
|
||||
inline std::u16string widen(std::string line) {
|
||||
inline std::u16string widen(std::string const &line) {
|
||||
std::u16string u16line;
|
||||
u16line.resize(line.size());
|
||||
for (size_t i = 0; i < line.size(); ++i) {
|
||||
for (uint32_t i = 0; i != line.size(); ++i) {
|
||||
u16line[i] = char16_t(line[i]);
|
||||
}
|
||||
return u16line;
|
||||
@ -181,28 +176,29 @@ std::vector<std::u16string> widen(const std::vector<std::string> &lines) {
|
||||
std::vector<std::u16string> u16lines;
|
||||
u16lines.reserve(lines.size());
|
||||
for (auto const &line : lines) {
|
||||
u16lines.push_back(widen(line));
|
||||
u16lines.emplace_back(widen(line));
|
||||
}
|
||||
return u16lines;
|
||||
}
|
||||
|
||||
void process(std::vector<std::string> &lines, size_t volume) {
|
||||
size_t repeat = 1000;
|
||||
size_t constexpr repeat = 1000;
|
||||
double volumeMB = volume / (1024. * 1024.);
|
||||
std::cout << "ASCII volume = " << volumeMB << " MB " << std::endl;
|
||||
pretty_print(volume, lines.size(), "fastfloat (64)",
|
||||
time_it_ns(lines, findmax_fastfloat64<char>, repeat));
|
||||
time_it_ns(lines, findmax_fastfloat<char, double>, repeat));
|
||||
pretty_print(volume, lines.size(), "fastfloat (32)",
|
||||
time_it_ns(lines, findmax_fastfloat32<char>, repeat));
|
||||
time_it_ns(lines, findmax_fastfloat<char, float>, repeat));
|
||||
|
||||
std::vector<std::u16string> lines16 = widen(lines);
|
||||
volume = 2 * volume;
|
||||
volumeMB = volume / (1024. * 1024.);
|
||||
std::cout << "UTF-16 volume = " << volumeMB << " MB " << std::endl;
|
||||
pretty_print(volume, lines.size(), "fastfloat (64)",
|
||||
time_it_ns(lines16, findmax_fastfloat64<char16_t>, repeat));
|
||||
pretty_print(
|
||||
volume, lines.size(), "fastfloat (64)",
|
||||
time_it_ns(lines16, findmax_fastfloat<char16_t, double>, repeat));
|
||||
pretty_print(volume, lines.size(), "fastfloat (32)",
|
||||
time_it_ns(lines16, findmax_fastfloat32<char16_t>, repeat));
|
||||
time_it_ns(lines16, findmax_fastfloat<char16_t, float>, repeat));
|
||||
}
|
||||
|
||||
void fileload(std::string filename) {
|
||||
@ -216,17 +212,38 @@ void fileload(std::string filename) {
|
||||
std::cout << "#### " << std::endl;
|
||||
std::string line;
|
||||
std::vector<std::string> lines;
|
||||
lines.reserve(10000); // let us reserve plenty of memory.
|
||||
lines.reserve(120000); // let us reserve plenty of memory.
|
||||
size_t volume = 0;
|
||||
while (getline(inputfile, line)) {
|
||||
#ifdef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||
if (line[0] == '-') {
|
||||
line.erase(0, 1);
|
||||
}
|
||||
#endif
|
||||
volume += line.size();
|
||||
lines.push_back(line);
|
||||
lines.emplace_back(line);
|
||||
}
|
||||
std::cout << "# read " << lines.size() << " lines " << std::endl;
|
||||
process(lines, volume);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
#ifdef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||
std::cout << "# FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN is enabled"
|
||||
<< std::endl;
|
||||
#endif
|
||||
#ifdef FASTFLOAT_TABLE_HACK_CHAR_DIGIT_LUT_DISABLED
|
||||
std::cout << "# FASTFLOAT_TABLE_HACK_CHAR_DIGIT_LUT_DISABLED is enabled"
|
||||
<< std::endl;
|
||||
#endif
|
||||
#ifdef FASTFLOAT_ONLY_ROUNDS_TO_NEAREST_SUPPORTED
|
||||
std::cout << "# FASTFLOAT_ONLY_ROUNDS_TO_NEAREST_SUPPORTED is enabled"
|
||||
<< std::endl;
|
||||
#endif
|
||||
#ifdef FASTFLOAT_ISNOT_CHECKED_BOUNDS
|
||||
std::cout << "# FASTFLOAT_ISNOT_CHECKED_BOUNDS is enabled" << std::endl;
|
||||
#endif
|
||||
#ifdef USING_COUNTERS
|
||||
if (collector.has_events()) {
|
||||
std::cout << "# Using hardware counters" << std::endl;
|
||||
} else {
|
||||
@ -236,11 +253,14 @@ int main(int argc, char **argv) {
|
||||
<< std::endl;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
if (argc > 1) {
|
||||
fileload(argv[1]);
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
fileload(std::string(BENCHMARK_DATA_DIR) + "/canada.txt");
|
||||
fileload(std::string(BENCHMARK_DATA_DIR) + "/canada_short.txt");
|
||||
fileload(std::string(BENCHMARK_DATA_DIR) + "/mesh.txt");
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
182
benchmarks/event_counter.h
Normal file
182
benchmarks/event_counter.h
Normal file
@ -0,0 +1,182 @@
|
||||
#ifndef __EVENT_COUNTER_H
|
||||
#define __EVENT_COUNTER_H
|
||||
|
||||
#include <cctype>
|
||||
#ifndef _MSC_VER
|
||||
#include <dirent.h>
|
||||
#endif
|
||||
#include <cinttypes>
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#include <chrono>
|
||||
#include <array>
|
||||
|
||||
#include "linux-perf-events.h"
|
||||
#ifdef __linux__
|
||||
#include <libgen.h>
|
||||
#endif
|
||||
|
||||
#if (defined(__APPLE__) && __APPLE__) && (defined(__aarch64__) && __aarch64__)
|
||||
#include "apple_arm_events.h"
|
||||
#endif
|
||||
|
||||
struct event_count {
|
||||
// The types of counters (so we can read the getter more easily)
|
||||
enum event_counter_types {
|
||||
CPU_CYCLES = 0,
|
||||
INSTRUCTIONS = 1,
|
||||
BRANCHES = 2,
|
||||
MISSED_BRANCHES = 3,
|
||||
event_counter_types_size = 4
|
||||
};
|
||||
|
||||
std::chrono::duration<double> elapsed;
|
||||
std::array<unsigned long long, event_counter_types_size> event_counts;
|
||||
|
||||
event_count() : elapsed(0), event_counts{0, 0, 0, 0} {}
|
||||
|
||||
event_count(const std::chrono::duration<double> &_elapsed,
|
||||
const std::array<unsigned long long, event_counter_types_size>
|
||||
&_event_counts)
|
||||
: elapsed(_elapsed), event_counts(_event_counts) {}
|
||||
|
||||
event_count(const event_count &other)
|
||||
: elapsed(other.elapsed), event_counts(other.event_counts) {}
|
||||
|
||||
double elapsed_sec() const {
|
||||
return std::chrono::duration<double>(elapsed).count();
|
||||
}
|
||||
|
||||
double elapsed_ns() const {
|
||||
return std::chrono::duration<double, std::nano>(elapsed).count();
|
||||
}
|
||||
|
||||
double cycles() const {
|
||||
return static_cast<double>(event_counts[CPU_CYCLES]);
|
||||
}
|
||||
|
||||
double instructions() const {
|
||||
return static_cast<double>(event_counts[INSTRUCTIONS]);
|
||||
}
|
||||
|
||||
double branches() const {
|
||||
return static_cast<double>(event_counts[BRANCHES]);
|
||||
}
|
||||
|
||||
double missed_branches() const {
|
||||
return static_cast<double>(event_counts[MISSED_BRANCHES]);
|
||||
}
|
||||
|
||||
event_count &operator=(const event_count &other) {
|
||||
this->elapsed = other.elapsed;
|
||||
this->event_counts = other.event_counts;
|
||||
return *this;
|
||||
}
|
||||
|
||||
event_count operator+(const event_count &other) const {
|
||||
return event_count(elapsed + other.elapsed,
|
||||
{
|
||||
event_counts[0] + other.event_counts[0],
|
||||
event_counts[1] + other.event_counts[1],
|
||||
event_counts[2] + other.event_counts[2],
|
||||
event_counts[3] + other.event_counts[3],
|
||||
});
|
||||
}
|
||||
|
||||
void operator+=(const event_count &other) { *this = *this + other; }
|
||||
};
|
||||
|
||||
struct event_aggregate {
|
||||
bool has_events = false;
|
||||
int iterations = 0;
|
||||
event_count total{};
|
||||
event_count best{};
|
||||
event_count worst{};
|
||||
|
||||
event_aggregate() = default;
|
||||
|
||||
void operator<<(const event_count &other) {
|
||||
if (iterations == 0 || other.elapsed < best.elapsed) {
|
||||
best = other;
|
||||
}
|
||||
if (iterations == 0 || other.elapsed > worst.elapsed) {
|
||||
worst = other;
|
||||
}
|
||||
iterations++;
|
||||
total += other;
|
||||
}
|
||||
|
||||
double elapsed_sec() const { return total.elapsed_sec() / iterations; }
|
||||
|
||||
double elapsed_ns() const { return total.elapsed_ns() / iterations; }
|
||||
|
||||
double cycles() const { return total.cycles() / iterations; }
|
||||
|
||||
double instructions() const { return total.instructions() / iterations; }
|
||||
|
||||
double branches() const { return total.branches() / iterations; }
|
||||
|
||||
double missed_branches() const {
|
||||
return total.missed_branches() / iterations;
|
||||
}
|
||||
};
|
||||
|
||||
struct event_collector {
|
||||
event_count count{};
|
||||
std::chrono::time_point<std::chrono::steady_clock> start_clock{};
|
||||
|
||||
#if defined(__linux__)
|
||||
LinuxEvents<PERF_TYPE_HARDWARE> linux_events;
|
||||
|
||||
event_collector()
|
||||
: linux_events(std::array<unsigned long long,
|
||||
4 /*event_counter_types_size*/>{
|
||||
PERF_COUNT_HW_CPU_CYCLES, PERF_COUNT_HW_INSTRUCTIONS,
|
||||
PERF_COUNT_HW_BRANCH_INSTRUCTIONS, // Retired branch instructions
|
||||
PERF_COUNT_HW_BRANCH_MISSES}) {}
|
||||
|
||||
bool has_events() { return linux_events.is_working(); }
|
||||
#elif __APPLE__ && __aarch64__
|
||||
performance_counters diff;
|
||||
|
||||
event_collector() : diff(0) { setup_performance_counters(); }
|
||||
|
||||
bool has_events() { return setup_performance_counters(); }
|
||||
#else
|
||||
event_collector() = default;
|
||||
|
||||
bool has_events() { return false; }
|
||||
#endif
|
||||
|
||||
inline void start() {
|
||||
#if defined(__linux)
|
||||
linux_events.start();
|
||||
#elif __APPLE__ && __aarch64__
|
||||
if (has_events()) {
|
||||
diff = get_counters();
|
||||
}
|
||||
#endif
|
||||
start_clock = std::chrono::steady_clock::now();
|
||||
}
|
||||
|
||||
inline event_count &end() {
|
||||
const auto end_clock = std::chrono::steady_clock::now();
|
||||
#if defined(__linux)
|
||||
linux_events.end(count.event_counts);
|
||||
#elif __APPLE__ && __aarch64__
|
||||
if (has_events()) {
|
||||
performance_counters end = get_counters();
|
||||
diff = end - diff;
|
||||
}
|
||||
count.event_counts[0] = diff.cycles;
|
||||
count.event_counts[1] = diff.instructions;
|
||||
count.event_counts[2] = diff.branches;
|
||||
count.event_counts[3] = diff.missed_branches;
|
||||
#endif
|
||||
count.elapsed = end_clock - start_clock;
|
||||
return count;
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
105
benchmarks/linux-perf-events.h
Normal file
105
benchmarks/linux-perf-events.h
Normal file
@ -0,0 +1,105 @@
|
||||
#pragma once
|
||||
#ifdef __linux__
|
||||
|
||||
#include <asm/unistd.h> // for __NR_perf_event_open
|
||||
#include <linux/perf_event.h> // for perf event constants
|
||||
#include <sys/ioctl.h> // for ioctl
|
||||
#include <unistd.h> // for syscall
|
||||
|
||||
#include <cerrno> // for errno
|
||||
#include <cstring> // for memset
|
||||
#include <stdexcept>
|
||||
|
||||
#include <array>
|
||||
#include <vector>
|
||||
|
||||
template <int TYPE = PERF_TYPE_HARDWARE> class LinuxEvents {
|
||||
int fd;
|
||||
bool working;
|
||||
perf_event_attr attribs{};
|
||||
size_t num_events{};
|
||||
std::vector<uint64_t> temp_result_vec{};
|
||||
std::vector<uint64_t> ids{};
|
||||
|
||||
public:
|
||||
explicit LinuxEvents(std::array<unsigned long long, 4> config_vec)
|
||||
: fd(0), working(true) {
|
||||
memset(&attribs, 0, sizeof(attribs));
|
||||
attribs.type = TYPE;
|
||||
attribs.size = sizeof(attribs);
|
||||
attribs.disabled = 1;
|
||||
attribs.exclude_kernel = 1;
|
||||
attribs.exclude_hv = 1;
|
||||
|
||||
attribs.sample_period = 0;
|
||||
attribs.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
|
||||
const int pid = 0; // the current process
|
||||
const int cpu = -1; // all CPUs
|
||||
const unsigned long flags = 0;
|
||||
|
||||
int group = -1; // no group
|
||||
num_events = config_vec.size();
|
||||
ids.resize(config_vec.size());
|
||||
uint32_t i = 0;
|
||||
for (auto config : config_vec) {
|
||||
attribs.config = config;
|
||||
int _fd = static_cast<int>(
|
||||
syscall(__NR_perf_event_open, &attribs, pid, cpu, group, flags));
|
||||
if (_fd == -1) {
|
||||
report_error("perf_event_open");
|
||||
}
|
||||
ioctl(_fd, PERF_EVENT_IOC_ID, &ids[i++]);
|
||||
if (group == -1) {
|
||||
group = _fd;
|
||||
fd = _fd;
|
||||
}
|
||||
}
|
||||
|
||||
temp_result_vec.resize(num_events * 2 + 1);
|
||||
}
|
||||
|
||||
~LinuxEvents() {
|
||||
if (fd != -1) {
|
||||
close(fd);
|
||||
}
|
||||
}
|
||||
|
||||
inline void start() {
|
||||
if (fd != -1) {
|
||||
if (ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) == -1) {
|
||||
report_error("ioctl(PERF_EVENT_IOC_RESET)");
|
||||
}
|
||||
|
||||
if (ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) == -1) {
|
||||
report_error("ioctl(PERF_EVENT_IOC_ENABLE)");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void end(std::array<unsigned long long, 4> &results) {
|
||||
if (fd != -1) {
|
||||
if (ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) == -1) {
|
||||
report_error("ioctl(PERF_EVENT_IOC_DISABLE)");
|
||||
}
|
||||
|
||||
if (read(fd, temp_result_vec.data(), temp_result_vec.size() * 8) == -1) {
|
||||
report_error("read");
|
||||
}
|
||||
}
|
||||
// our actual results are in slots 1,3,5, ... of this structure
|
||||
for (uint32_t i = 1; i < temp_result_vec.size(); i += 2) {
|
||||
results[i / 2] = temp_result_vec[i];
|
||||
}
|
||||
for (uint32_t i = 2; i < temp_result_vec.size(); i += 2) {
|
||||
if (ids[i / 2 - 1] != temp_result_vec[i]) {
|
||||
report_error("event mismatch");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool is_working() { return working; }
|
||||
|
||||
private:
|
||||
void report_error(const std::string &) { working = false; }
|
||||
};
|
||||
#endif
|
||||
@ -10,17 +10,16 @@
|
||||
|
||||
#include "float_common.h"
|
||||
|
||||
#ifdef FASTFLOAT_SSE2
|
||||
#if defined(FASTFLOAT_SSE2)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef FASTFLOAT_NEON
|
||||
#elif defined(FASTFLOAT_NEON)
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
namespace fast_float {
|
||||
|
||||
template <typename UC> fastfloat_really_inline constexpr bool has_simd_opt() {
|
||||
template <typename UC>
|
||||
fastfloat_really_inline constexpr bool has_simd_opt() noexcept {
|
||||
#ifdef FASTFLOAT_HAS_SIMD
|
||||
return std::is_same<UC, char16_t>::value;
|
||||
#else
|
||||
@ -35,32 +34,32 @@ fastfloat_really_inline constexpr bool is_integer(UC c) noexcept {
|
||||
return (unsigned)(c - UC('0')) <= 9u;
|
||||
}
|
||||
|
||||
fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) {
|
||||
fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) noexcept {
|
||||
return (val & 0xFF00000000000000) >> 56 | (val & 0x00FF000000000000) >> 40 |
|
||||
(val & 0x0000FF0000000000) >> 24 | (val & 0x000000FF00000000) >> 8 |
|
||||
(val & 0x00000000FF000000) << 8 | (val & 0x0000000000FF0000) << 24 |
|
||||
(val & 0x000000000000FF00) << 40 | (val & 0x00000000000000FF) << 56;
|
||||
}
|
||||
|
||||
fastfloat_really_inline constexpr uint32_t byteswap_32(uint32_t val) {
|
||||
fastfloat_really_inline constexpr uint32_t byteswap(uint32_t val) noexcept {
|
||||
return (val >> 24) | ((val >> 8) & 0x0000FF00u) | ((val << 8) & 0x00FF0000u) |
|
||||
(val << 24);
|
||||
}
|
||||
|
||||
// Read 8 UC into a u64. Truncates UC if not char.
|
||||
template <typename UC>
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t
|
||||
read8_to_u64(UC const *chars) {
|
||||
// Read UCs into an unsigned integer. Truncates UC if not char.
|
||||
template <typename T, typename UC>
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 T
|
||||
read_chars_to_unsigned(UC const *chars) noexcept {
|
||||
if (cpp20_and_in_constexpr() || !std::is_same<UC, char>::value) {
|
||||
uint64_t val = 0;
|
||||
for (int i = 0; i < 8; ++i) {
|
||||
val |= uint64_t(uint8_t(*chars)) << (i * 8);
|
||||
T val = 0;
|
||||
for (uint_fast8_t i = 0; i != sizeof(T); ++i) {
|
||||
val |= T(uint8_t(*chars)) << (i * 8);
|
||||
++chars;
|
||||
}
|
||||
return val;
|
||||
}
|
||||
uint64_t val;
|
||||
::memcpy(&val, chars, sizeof(uint64_t));
|
||||
T val;
|
||||
::memcpy(&val, chars, sizeof(T));
|
||||
#if FASTFLOAT_IS_BIG_ENDIAN == 1
|
||||
// Need to read as-if the number was in little-endian order.
|
||||
val = byteswap(val);
|
||||
@ -68,39 +67,19 @@ read8_to_u64(UC const *chars) {
|
||||
return val;
|
||||
}
|
||||
|
||||
// Read 4 UC into a u32. Truncates UC if not char.
|
||||
template <typename UC>
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint32_t
|
||||
read4_to_u32(UC const *chars) {
|
||||
if (cpp20_and_in_constexpr() || !std::is_same<UC, char>::value) {
|
||||
uint32_t val = 0;
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
val |= uint32_t(uint8_t(*chars)) << (i * 8);
|
||||
++chars;
|
||||
}
|
||||
return val;
|
||||
}
|
||||
uint32_t val;
|
||||
::memcpy(&val, chars, sizeof(uint32_t));
|
||||
#if FASTFLOAT_IS_BIG_ENDIAN == 1
|
||||
val = byteswap_32(val);
|
||||
#endif
|
||||
return val;
|
||||
}
|
||||
#ifdef FASTFLOAT_SSE2
|
||||
|
||||
fastfloat_really_inline uint64_t simd_read8_to_u64(__m128i const data) {
|
||||
FASTFLOAT_SIMD_DISABLE_WARNINGS
|
||||
fastfloat_really_inline uint64_t simd_read8_to_u64(__m128i const &data) {
|
||||
// _mm_packus_epi16 is SSE2+, converts 8×u16 → 8×u8
|
||||
__m128i const packed = _mm_packus_epi16(data, data);
|
||||
#ifdef FASTFLOAT_64BIT
|
||||
return uint64_t(_mm_cvtsi128_si64(packed));
|
||||
return static_cast<uint64_t>(_mm_cvtsi128_si64(packed));
|
||||
#else
|
||||
uint64_t value;
|
||||
// Visual Studio + older versions of GCC don't support _mm_storeu_si64
|
||||
_mm_storel_epi64(reinterpret_cast<__m128i *>(&value), packed);
|
||||
return value;
|
||||
#endif
|
||||
FASTFLOAT_SIMD_RESTORE_WARNINGS
|
||||
}
|
||||
|
||||
fastfloat_really_inline uint64_t simd_read8_to_u64(char16_t const *chars) {
|
||||
@ -112,11 +91,9 @@ fastfloat_really_inline uint64_t simd_read8_to_u64(char16_t const *chars) {
|
||||
|
||||
#elif defined(FASTFLOAT_NEON)
|
||||
|
||||
fastfloat_really_inline uint64_t simd_read8_to_u64(uint16x8_t const data) {
|
||||
FASTFLOAT_SIMD_DISABLE_WARNINGS
|
||||
fastfloat_really_inline uint64_t simd_read8_to_u64(uint16x8_t const &data) {
|
||||
uint8x8_t utf8_packed = vmovn_u16(data);
|
||||
return vget_lane_u64(vreinterpret_u64_u8(utf8_packed), 0);
|
||||
FASTFLOAT_SIMD_RESTORE_WARNINGS
|
||||
}
|
||||
|
||||
fastfloat_really_inline uint64_t simd_read8_to_u64(char16_t const *chars) {
|
||||
@ -141,7 +118,7 @@ uint64_t simd_read8_to_u64(UC const *) {
|
||||
|
||||
// credit @aqrit
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint32_t
|
||||
parse_eight_digits_unrolled(uint64_t val) {
|
||||
parse_eight_digits_unrolled(uint64_t val) noexcept {
|
||||
uint64_t const mask = 0x000000FF000000FF;
|
||||
uint64_t const mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
|
||||
uint64_t const mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
|
||||
@ -156,7 +133,8 @@ template <typename UC>
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint32_t
|
||||
parse_eight_digits_unrolled(UC const *chars) noexcept {
|
||||
if (cpp20_and_in_constexpr() || !has_simd_opt<UC>()) {
|
||||
return parse_eight_digits_unrolled(read8_to_u64(chars)); // truncation okay
|
||||
return parse_eight_digits_unrolled(
|
||||
read_chars_to_unsigned<uint64_t>(chars)); // truncation okay
|
||||
}
|
||||
return parse_eight_digits_unrolled(simd_read8_to_u64(chars));
|
||||
}
|
||||
@ -193,23 +171,27 @@ simd_parse_if_eight_digits_unrolled(char16_t const *chars,
|
||||
}
|
||||
#ifdef FASTFLOAT_SSE2
|
||||
FASTFLOAT_SIMD_DISABLE_WARNINGS
|
||||
// Load 8 UTF-16 characters (16 bytes)
|
||||
__m128i const data =
|
||||
_mm_loadu_si128(reinterpret_cast<__m128i const *>(chars));
|
||||
FASTFLOAT_SIMD_RESTORE_WARNINGS
|
||||
|
||||
// (x - '0') <= 9
|
||||
// Branchless "are all digits?" trick from Lemire:
|
||||
// (x - '0') <= 9 <=> (x + 32720) <= 32729
|
||||
// encoded as signed comparison: (x + 32720) > -32759 ? not digit : digit
|
||||
// http://0x80.pl/articles/simd-parsing-int-sequences.html
|
||||
__m128i const t0 = _mm_add_epi16(data, _mm_set1_epi16(32720));
|
||||
__m128i const t1 = _mm_cmpgt_epi16(t0, _mm_set1_epi16(-32759));
|
||||
__m128i const mask = _mm_cmpgt_epi16(t0, _mm_set1_epi16(-32759));
|
||||
|
||||
if (_mm_movemask_epi8(t1) == 0) {
|
||||
// If mask == 0 → all digits valid.
|
||||
if (_mm_movemask_epi8(mask) == 0) {
|
||||
i = i * 100000000 + parse_eight_digits_unrolled(simd_read8_to_u64(data));
|
||||
return true;
|
||||
} else
|
||||
return false;
|
||||
FASTFLOAT_SIMD_RESTORE_WARNINGS
|
||||
}
|
||||
#elif defined(FASTFLOAT_NEON)
|
||||
FASTFLOAT_SIMD_DISABLE_WARNINGS
|
||||
uint16x8_t const data = vld1q_u16(reinterpret_cast<uint16_t const *>(chars));
|
||||
FASTFLOAT_SIMD_RESTORE_WARNINGS
|
||||
|
||||
// (x - '0') <= 9
|
||||
// http://0x80.pl/articles/simd-parsing-int-sequences.html
|
||||
@ -219,14 +201,12 @@ simd_parse_if_eight_digits_unrolled(char16_t const *chars,
|
||||
if (vminvq_u16(mask) == 0xFFFF) {
|
||||
i = i * 100000000 + parse_eight_digits_unrolled(simd_read8_to_u64(data));
|
||||
return true;
|
||||
} else
|
||||
return false;
|
||||
FASTFLOAT_SIMD_RESTORE_WARNINGS
|
||||
}
|
||||
#else
|
||||
(void)chars;
|
||||
(void)i;
|
||||
return false;
|
||||
#endif // FASTFLOAT_SSE2
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif // FASTFLOAT_HAS_SIMD
|
||||
@ -260,20 +240,25 @@ loop_parse_if_eight_digits(char const *&p, char const *const pend,
|
||||
uint64_t &i) {
|
||||
// optimizes better than parse_if_eight_digits_unrolled() for UC = char.
|
||||
while ((std::distance(p, pend) >= 8) &&
|
||||
is_made_of_eight_digits_fast(read8_to_u64(p))) {
|
||||
is_made_of_eight_digits_fast(read_chars_to_unsigned<uint64_t>(p))) {
|
||||
i = i * 100000000 +
|
||||
parse_eight_digits_unrolled(read8_to_u64(
|
||||
parse_eight_digits_unrolled(read_chars_to_unsigned<uint64_t>(
|
||||
p)); // in rare cases, this will overflow, but that's ok
|
||||
p += 8;
|
||||
}
|
||||
}
|
||||
|
||||
enum class parse_error {
|
||||
enum class parse_error : uint_fast8_t {
|
||||
no_error,
|
||||
// [JSON-only] The minus sign must be followed by an integer.
|
||||
missing_integer_after_sign,
|
||||
// A sign must be followed by an integer or dot.
|
||||
missing_integer_or_dot_after_sign,
|
||||
// The mantissa must have at least one digit.
|
||||
no_digits_in_mantissa,
|
||||
// Scientific notation requires an exponential part.
|
||||
missing_exponential_part,
|
||||
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||
// [JSON-only] The minus sign must be followed by an integer.
|
||||
missing_integer_after_sign,
|
||||
// [JSON-only] The integer part must not have leading zeros.
|
||||
leading_zeros_in_integer_part,
|
||||
// [JSON-only] The integer part must have at least one digit.
|
||||
@ -281,23 +266,25 @@ enum class parse_error {
|
||||
// [JSON-only] If there is a decimal point, there must be digits in the
|
||||
// fractional part.
|
||||
no_digits_in_fractional_part,
|
||||
// The mantissa must have at least one digit.
|
||||
no_digits_in_mantissa,
|
||||
// Scientific notation requires an exponential part.
|
||||
missing_exponential_part,
|
||||
#endif
|
||||
};
|
||||
|
||||
template <typename UC> struct parsed_number_string_t {
|
||||
int64_t exponent{0};
|
||||
uint64_t mantissa{0};
|
||||
UC const *lastmatch{nullptr};
|
||||
bool negative{false};
|
||||
bool valid{false};
|
||||
bool too_many_digits{false};
|
||||
am_mant_t mantissa;
|
||||
|
||||
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||
bool negative;
|
||||
#endif
|
||||
bool invalid;
|
||||
bool too_many_digits;
|
||||
parse_error error;
|
||||
|
||||
am_pow_t exponent;
|
||||
|
||||
// contains the range of the significant digits
|
||||
span<UC const> integer{}; // non-nullable
|
||||
span<UC const> fraction{}; // nullable
|
||||
parse_error error{parse_error::no_error};
|
||||
span<UC const> integer; // non-nullable
|
||||
span<UC const> fraction; // nullable
|
||||
UC const *lastmatch;
|
||||
};
|
||||
|
||||
using byte_span = span<char const>;
|
||||
@ -305,9 +292,9 @@ using parsed_number_string = parsed_number_string_t<char>;
|
||||
|
||||
template <typename UC>
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 parsed_number_string_t<UC>
|
||||
report_parse_error(UC const *p, parse_error error) {
|
||||
parsed_number_string_t<UC> answer;
|
||||
answer.valid = false;
|
||||
report_parse_error(parsed_number_string_t<UC> &answer, UC const *p,
|
||||
parse_error error) noexcept {
|
||||
answer.invalid = true;
|
||||
answer.lastmatch = p;
|
||||
answer.error = error;
|
||||
return answer;
|
||||
@ -318,125 +305,154 @@ report_parse_error(UC const *p, parse_error error) {
|
||||
template <bool basic_json_fmt, typename UC>
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 parsed_number_string_t<UC>
|
||||
parse_number_string(UC const *p, UC const *pend,
|
||||
parse_options_t<UC> options) noexcept {
|
||||
chars_format const fmt = detail::adjust_for_feature_macros(options.format);
|
||||
UC const decimal_point = options.decimal_point;
|
||||
|
||||
parsed_number_string_t<UC> answer;
|
||||
answer.valid = false;
|
||||
answer.too_many_digits = false;
|
||||
// assume p < pend, so dereference without checks;
|
||||
parse_options_t<UC> const options) noexcept {
|
||||
parsed_number_string_t<UC> answer{};
|
||||
// so dereference without checks
|
||||
FASTFLOAT_ASSUME(p < pend);
|
||||
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||
answer.negative = (*p == UC('-'));
|
||||
// C++17 20.19.3.(7.1) explicitly forbids '+' sign here
|
||||
if ((*p == UC('-')) || (uint64_t(fmt & chars_format::allow_leading_plus) &&
|
||||
!basic_json_fmt && *p == UC('+'))) {
|
||||
if (answer.negative ||
|
||||
((chars_format_t(options.format & chars_format::allow_leading_plus)) &&
|
||||
(!basic_json_fmt && *p == UC('+')))) {
|
||||
++p;
|
||||
if (p == pend) {
|
||||
return report_parse_error<UC>(
|
||||
p, parse_error::missing_integer_or_dot_after_sign);
|
||||
answer, p, parse_error::missing_integer_or_dot_after_sign);
|
||||
}
|
||||
FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
|
||||
if (!is_integer(*p)) { // a sign must be followed by an integer
|
||||
return report_parse_error<UC>(p,
|
||||
// a sign must be followed by an integer
|
||||
if (!is_integer(*p)) {
|
||||
return report_parse_error<UC>(answer, p,
|
||||
parse_error::missing_integer_after_sign);
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (!is_integer(*p) &&
|
||||
(*p !=
|
||||
decimal_point)) { // a sign must be followed by an integer or the dot
|
||||
// a sign must be followed by an integer or the dot
|
||||
if (!is_integer(*p) && (*p != options.decimal_point)) {
|
||||
return report_parse_error<UC>(
|
||||
p, parse_error::missing_integer_or_dot_after_sign);
|
||||
answer, p, parse_error::missing_integer_or_dot_after_sign);
|
||||
}
|
||||
}
|
||||
}
|
||||
UC const *const start_digits = p;
|
||||
#endif
|
||||
|
||||
uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad)
|
||||
auto const *const start_digits = p;
|
||||
|
||||
while ((p != pend) && is_integer(*p)) {
|
||||
// a multiplication by 10 is cheaper than an arbitrary integer
|
||||
// multiplication
|
||||
i = 10 * i +
|
||||
uint64_t(*p -
|
||||
UC('0')); // might overflow, we will handle the overflow later
|
||||
answer.mantissa = static_cast<fast_float::am_mant_t>(
|
||||
answer.mantissa * 10 +
|
||||
static_cast<uint8_t>(
|
||||
*p - UC('0'))); // might overflow, we will handle the overflow later
|
||||
++p;
|
||||
}
|
||||
UC const *const end_of_integer_part = p;
|
||||
int64_t digit_count = int64_t(end_of_integer_part - start_digits);
|
||||
answer.integer = span<UC const>(start_digits, size_t(digit_count));
|
||||
|
||||
auto const *const end_of_integer_part = p;
|
||||
auto digit_count = static_cast<am_digits>(end_of_integer_part - start_digits);
|
||||
answer.integer = span<UC const>(start_digits, digit_count);
|
||||
// We have now parsed the integer part of the mantissa.
|
||||
|
||||
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||
FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
|
||||
// at least 1 digit in integer part, without leading zeros
|
||||
if (digit_count == 0) {
|
||||
return report_parse_error<UC>(p, parse_error::no_digits_in_integer_part);
|
||||
return report_parse_error<UC>(answer, p,
|
||||
parse_error::no_digits_in_integer_part);
|
||||
}
|
||||
if ((start_digits[0] == UC('0') && digit_count > 1)) {
|
||||
return report_parse_error<UC>(start_digits,
|
||||
return report_parse_error<UC>(answer, start_digits,
|
||||
parse_error::leading_zeros_in_integer_part);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
int64_t exponent = 0;
|
||||
bool const has_decimal_point = (p != pend) && (*p == decimal_point);
|
||||
if (has_decimal_point) {
|
||||
// We can now parse the fraction part of the mantissa.
|
||||
if ((p != pend) && (*p == options.decimal_point)) {
|
||||
++p;
|
||||
UC const *before = p;
|
||||
auto const *const before = p;
|
||||
// can occur at most twice without overflowing, but let it occur more, since
|
||||
// for integers with many digits, digit parsing is the primary bottleneck.
|
||||
loop_parse_if_eight_digits(p, pend, i);
|
||||
loop_parse_if_eight_digits(p, pend, answer.mantissa);
|
||||
|
||||
while ((p != pend) && is_integer(*p)) {
|
||||
uint8_t digit = uint8_t(*p - UC('0'));
|
||||
++p;
|
||||
i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
|
||||
}
|
||||
exponent = before - p;
|
||||
answer.fraction = span<UC const>(before, size_t(p - before));
|
||||
digit_count -= exponent;
|
||||
}
|
||||
FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
|
||||
// at least 1 digit in fractional part
|
||||
if (has_decimal_point && exponent == 0) {
|
||||
return report_parse_error<UC>(p,
|
||||
parse_error::no_digits_in_fractional_part);
|
||||
}
|
||||
}
|
||||
else if (digit_count == 0) { // we must have encountered at least one integer!
|
||||
return report_parse_error<UC>(p, parse_error::no_digits_in_mantissa);
|
||||
}
|
||||
int64_t exp_number = 0; // explicit exponential part
|
||||
if ((uint64_t(fmt & chars_format::scientific) && (p != pend) &&
|
||||
((UC('e') == *p) || (UC('E') == *p))) ||
|
||||
(uint64_t(fmt & detail::basic_fortran_fmt) && (p != pend) &&
|
||||
((UC('+') == *p) || (UC('-') == *p) || (UC('d') == *p) ||
|
||||
(UC('D') == *p)))) {
|
||||
UC const *location_of_e = p;
|
||||
if ((UC('e') == *p) || (UC('E') == *p) || (UC('d') == *p) ||
|
||||
(UC('D') == *p)) {
|
||||
auto const digit = uint8_t(*p - UC('0'));
|
||||
answer.mantissa = static_cast<fast_float::am_mant_t>(
|
||||
answer.mantissa * 10 +
|
||||
digit); // in rare cases, this will overflow, but that's ok
|
||||
++p;
|
||||
}
|
||||
answer.exponent = static_cast<am_pow_t>(before - p);
|
||||
answer.fraction =
|
||||
span<UC const>(before, static_cast<am_digits>(p - before));
|
||||
digit_count -= static_cast<am_digits>(answer.exponent);
|
||||
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||
FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
|
||||
// at least 1 digit in fractional part
|
||||
if (answer.exponent == 0) {
|
||||
return report_parse_error<UC>(
|
||||
answer, p, parse_error::no_digits_in_fractional_part);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} else if (digit_count == 0) {
|
||||
// We must have encountered at least one integer!
|
||||
return report_parse_error<UC>(answer, p,
|
||||
parse_error::no_digits_in_mantissa);
|
||||
}
|
||||
// We have now parsed the integer and the fraction part of the mantissa.
|
||||
|
||||
// Now we can parse the explicit exponential part.
|
||||
am_pow_t exp_number = 0; // explicit exponential part
|
||||
if ((p != pend) &&
|
||||
((chars_format_t(options.format & chars_format::scientific) &&
|
||||
(UC('e') == *p || UC('E') == *p))
|
||||
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||
|| (chars_format_t(options.format & detail::basic_fortran_fmt) &&
|
||||
((UC('+') == *p) || (UC('-') == *p) || (UC('d') == *p) ||
|
||||
(UC('D') == *p)))
|
||||
#endif
|
||||
)) {
|
||||
auto const *location_of_e = p;
|
||||
#ifdef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||
++p;
|
||||
#else
|
||||
if ((UC('e') == *p) || (UC('E') == *p)
|
||||
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||
|| (UC('d') == *p) || (UC('D') == *p)
|
||||
#endif
|
||||
) {
|
||||
++p;
|
||||
}
|
||||
#endif
|
||||
bool neg_exp = false;
|
||||
if ((p != pend) && (UC('-') == *p)) {
|
||||
neg_exp = true;
|
||||
++p;
|
||||
} else if ((p != pend) &&
|
||||
(UC('+') ==
|
||||
*p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1)
|
||||
++p;
|
||||
if (p != pend) {
|
||||
if (UC('-') == *p) {
|
||||
neg_exp = true;
|
||||
++p;
|
||||
} else if (UC('+') == *p) {
|
||||
// '+' on exponent is allowed by C++17 20.19.3.(7.1)
|
||||
++p;
|
||||
}
|
||||
}
|
||||
// We have now parsed the sign of the exponent.
|
||||
if ((p == pend) || !is_integer(*p)) {
|
||||
if (!uint64_t(fmt & chars_format::fixed)) {
|
||||
// The exponential part is invalid for scientific notation, so it must
|
||||
// be a trailing token for fixed notation. However, fixed notation is
|
||||
// disabled, so report a scientific notation error.
|
||||
return report_parse_error<UC>(p, parse_error::missing_exponential_part);
|
||||
if (!(chars_format_t(options.format & chars_format::fixed))) {
|
||||
// The exponential part is invalid for scientific notation, so it
|
||||
// must be a trailing token for fixed notation. However, fixed
|
||||
// notation is disabled, so report a scientific notation error.
|
||||
return report_parse_error<UC>(answer, p,
|
||||
parse_error::missing_exponential_part);
|
||||
}
|
||||
// Otherwise, we will be ignoring the 'e'.
|
||||
p = location_of_e;
|
||||
} else {
|
||||
// Now let's parse the explicit exponent.
|
||||
while ((p != pend) && is_integer(*p)) {
|
||||
uint8_t digit = uint8_t(*p - UC('0'));
|
||||
if (exp_number < 0x10000000) {
|
||||
if (exp_number < am_bias_limit) {
|
||||
// check for exponent overflow if we have too many digits.
|
||||
auto const digit = uint8_t(*p - UC('0'));
|
||||
exp_number = 10 * exp_number + digit;
|
||||
}
|
||||
++p;
|
||||
@ -444,17 +460,21 @@ parse_number_string(UC const *p, UC const *pend,
|
||||
if (neg_exp) {
|
||||
exp_number = -exp_number;
|
||||
}
|
||||
exponent += exp_number;
|
||||
answer.exponent += exp_number;
|
||||
}
|
||||
} else {
|
||||
// If it scientific and not fixed, we have to bail out.
|
||||
if (uint64_t(fmt & chars_format::scientific) &&
|
||||
!uint64_t(fmt & chars_format::fixed)) {
|
||||
return report_parse_error<UC>(p, parse_error::missing_exponential_part);
|
||||
if ((chars_format_t(options.format & chars_format::scientific)) &&
|
||||
!(chars_format_t(options.format & chars_format::fixed))) {
|
||||
return report_parse_error<UC>(answer, p,
|
||||
parse_error::missing_exponential_part);
|
||||
}
|
||||
}
|
||||
|
||||
// We parsed all parts of the number, let's save progress.
|
||||
answer.lastmatch = p;
|
||||
answer.valid = true;
|
||||
|
||||
// Now we can check for errors.
|
||||
|
||||
// If we frequently had to deal with long strings of digits,
|
||||
// we could extend our code by using a 128-bit integer instead
|
||||
@ -466,58 +486,64 @@ parse_number_string(UC const *p, UC const *pend,
|
||||
// We have to handle the case where we have 0.0000somenumber.
|
||||
// We need to be mindful of the case where we only have zeroes...
|
||||
// E.g., 0.000000000...000.
|
||||
UC const *start = start_digits;
|
||||
while ((start != pend) && (*start == UC('0') || *start == decimal_point)) {
|
||||
auto const *start = start_digits;
|
||||
while ((start != pend) &&
|
||||
(*start == UC('0') || *start == options.decimal_point)) {
|
||||
if (*start == UC('0')) {
|
||||
digit_count--;
|
||||
--digit_count;
|
||||
}
|
||||
start++;
|
||||
++start;
|
||||
}
|
||||
|
||||
// We have to check if number has more than 19 significant digits.
|
||||
if (digit_count > 19) {
|
||||
answer.too_many_digits = true;
|
||||
// Let us start again, this time, avoiding overflows.
|
||||
// We don't need to call if is_integer, since we use the
|
||||
// pre-tokenized spans from above.
|
||||
i = 0;
|
||||
answer.mantissa = 0;
|
||||
p = answer.integer.ptr;
|
||||
UC const *int_end = p + answer.integer.len();
|
||||
uint64_t const minimal_nineteen_digit_integer{1000000000000000000};
|
||||
while ((i < minimal_nineteen_digit_integer) && (p != int_end)) {
|
||||
i = i * 10 + uint64_t(*p - UC('0'));
|
||||
constexpr am_mant_t minimal_nineteen_digit_integer{1000000000000000000};
|
||||
while ((p != int_end) &&
|
||||
(answer.mantissa < minimal_nineteen_digit_integer)) {
|
||||
answer.mantissa =
|
||||
answer.mantissa * 10 + static_cast<am_mant_t>(*p - UC('0'));
|
||||
++p;
|
||||
}
|
||||
if (i >= minimal_nineteen_digit_integer) { // We have a big integer
|
||||
exponent = end_of_integer_part - p + exp_number;
|
||||
} else { // We have a value with a fractional component.
|
||||
if (answer.mantissa >= minimal_nineteen_digit_integer) {
|
||||
// We have a big integers, so skip the fraction part completely.
|
||||
answer.exponent = am_pow_t(end_of_integer_part - p) + exp_number;
|
||||
} else {
|
||||
// We have a value with a significant fractional component.
|
||||
p = answer.fraction.ptr;
|
||||
UC const *frac_end = p + answer.fraction.len();
|
||||
while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
|
||||
i = i * 10 + uint64_t(*p - UC('0'));
|
||||
UC const *const frac_end = p + answer.fraction.len();
|
||||
while ((p != frac_end) &&
|
||||
(answer.mantissa < minimal_nineteen_digit_integer)) {
|
||||
answer.mantissa = static_cast<am_mant_t>(
|
||||
answer.mantissa * 10 + static_cast<am_mant_t>(*p - UC('0')));
|
||||
++p;
|
||||
}
|
||||
exponent = answer.fraction.ptr - p + exp_number;
|
||||
answer.exponent = am_pow_t(answer.fraction.ptr - p) + exp_number;
|
||||
}
|
||||
// We have now corrected both exponent and i, to a truncated value
|
||||
// We now corrected both exponent and mantissa, to a truncated value
|
||||
}
|
||||
}
|
||||
answer.exponent = exponent;
|
||||
answer.mantissa = i;
|
||||
|
||||
return answer;
|
||||
}
|
||||
|
||||
template <typename T, typename UC>
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
|
||||
parse_int_string(UC const *p, UC const *pend, T &value,
|
||||
parse_options_t<UC> options) {
|
||||
chars_format const fmt = detail::adjust_for_feature_macros(options.format);
|
||||
int const base = options.base;
|
||||
|
||||
parse_options_t<UC> const options) noexcept {
|
||||
from_chars_result_t<UC> answer;
|
||||
|
||||
UC const *const first = p;
|
||||
auto const *const first = p;
|
||||
|
||||
bool const negative = (*p == UC('-'));
|
||||
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||
// Read sign
|
||||
auto const negative = (*p == UC('-'));
|
||||
#ifdef FASTFLOAT_VISUAL_STUDIO
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable : 4127)
|
||||
@ -530,24 +556,27 @@ parse_int_string(UC const *p, UC const *pend, T &value,
|
||||
answer.ptr = first;
|
||||
return answer;
|
||||
}
|
||||
if ((*p == UC('-')) ||
|
||||
(uint64_t(fmt & chars_format::allow_leading_plus) && (*p == UC('+')))) {
|
||||
if (negative ||
|
||||
((chars_format_t(options.format & chars_format::allow_leading_plus)) &&
|
||||
(*p == UC('+')))) {
|
||||
++p;
|
||||
}
|
||||
#endif
|
||||
|
||||
UC const *const start_num = p;
|
||||
auto const *const start_num = p;
|
||||
|
||||
// Skip leading zeros
|
||||
while (p != pend && *p == UC('0')) {
|
||||
++p;
|
||||
}
|
||||
|
||||
bool const has_leading_zeros = p > start_num;
|
||||
auto const has_leading_zeros = p > start_num;
|
||||
|
||||
UC const *const start_digits = p;
|
||||
auto const *const start_digits = p;
|
||||
|
||||
FASTFLOAT_IF_CONSTEXPR17((std::is_same<T, std::uint8_t>::value)) {
|
||||
if (base == 10) {
|
||||
const size_t len = (size_t)(pend - p);
|
||||
if (options.base == 10) {
|
||||
auto const len = static_cast<am_digits>(pend - p);
|
||||
if (len == 0) {
|
||||
if (has_leading_zeros) {
|
||||
value = 0;
|
||||
@ -562,53 +591,39 @@ parse_int_string(UC const *p, UC const *pend, T &value,
|
||||
|
||||
uint32_t digits;
|
||||
|
||||
#if FASTFLOAT_HAS_IS_CONSTANT_EVALUATED && FASTFLOAT_HAS_BIT_CAST
|
||||
if (std::is_constant_evaluated()) {
|
||||
uint8_t str[4]{};
|
||||
for (size_t j = 0; j < 4 && j < len; ++j) {
|
||||
str[j] = static_cast<uint8_t>(p[j]);
|
||||
}
|
||||
digits = std::bit_cast<uint32_t>(str);
|
||||
#if FASTFLOAT_IS_BIG_ENDIAN
|
||||
digits = byteswap_32(digits);
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
if (false) {
|
||||
}
|
||||
#endif
|
||||
else if (len >= 4) {
|
||||
::memcpy(&digits, p, 4);
|
||||
#if FASTFLOAT_IS_BIG_ENDIAN
|
||||
digits = byteswap_32(digits);
|
||||
#endif
|
||||
if (len >= sizeof(uint32_t)) {
|
||||
digits = read_chars_to_unsigned<uint32_t>(p);
|
||||
} else {
|
||||
uint32_t b0 = static_cast<uint8_t>(p[0]);
|
||||
uint32_t b1 = (len > 1) ? static_cast<uint8_t>(p[1]) : 0xFFu;
|
||||
uint32_t b2 = (len > 2) ? static_cast<uint8_t>(p[2]) : 0xFFu;
|
||||
uint32_t b3 = 0xFFu;
|
||||
uint32_t const b0 = static_cast<uint8_t>(p[0]);
|
||||
uint32_t const b1 = (len > 1) ? static_cast<uint8_t>(p[1]) : 0x00u;
|
||||
uint32_t const b2 = (len > 2) ? static_cast<uint8_t>(p[2]) : 0x00u;
|
||||
uint32_t const b3 = 0x00u;
|
||||
digits = b0 | (b1 << 8) | (b2 << 16) | (b3 << 24);
|
||||
}
|
||||
#if FASTFLOAT_IS_BIG_ENDIAN
|
||||
digits = byteswap(digits);
|
||||
#endif
|
||||
|
||||
uint32_t magic =
|
||||
uint32_t const magic =
|
||||
((digits + 0x46464646u) | (digits - 0x30303030u)) & 0x80808080u;
|
||||
uint32_t tz = (uint32_t)countr_zero_32(magic); // 7, 15, 23, 31, or 32
|
||||
uint32_t nd = (tz == 32) ? 4 : (tz >> 3);
|
||||
nd = (uint32_t)std::min((size_t)nd, len);
|
||||
auto const tz = countr_zero_32(magic); // 7, 15, 23, 31, or 32
|
||||
auto nd = static_cast<am_digits>(tz >> 3);
|
||||
nd = std::min(nd, len);
|
||||
if (nd == 0) {
|
||||
if (has_leading_zeros) {
|
||||
value = 0;
|
||||
answer.ec = std::errc();
|
||||
answer.ptr = p;
|
||||
return answer;
|
||||
} else {
|
||||
answer.ec = std::errc::invalid_argument;
|
||||
answer.ptr = first;
|
||||
}
|
||||
answer.ec = std::errc::invalid_argument;
|
||||
answer.ptr = first;
|
||||
return answer;
|
||||
}
|
||||
if (nd > 3) {
|
||||
const UC *q = p + nd;
|
||||
size_t rem = len - nd;
|
||||
auto rem = len - nd;
|
||||
while (rem) {
|
||||
if (*q < UC('0') || *q > UC('9'))
|
||||
break;
|
||||
@ -623,14 +638,15 @@ parse_int_string(UC const *p, UC const *pend, T &value,
|
||||
digits ^= 0x30303030u;
|
||||
digits <<= ((4 - nd) * 8);
|
||||
|
||||
uint32_t check = ((digits >> 24) & 0xff) | ((digits >> 8) & 0xff00) |
|
||||
((digits << 8) & 0xff0000);
|
||||
uint32_t const check = ((digits >> 24) & 0xff) |
|
||||
((digits >> 8) & 0xff00) |
|
||||
((digits << 8) & 0xff0000);
|
||||
if (check > 0x00020505) {
|
||||
answer.ec = std::errc::result_out_of_range;
|
||||
answer.ptr = p + nd;
|
||||
return answer;
|
||||
}
|
||||
value = (uint8_t)((0x640a01 * digits) >> 24);
|
||||
value = static_cast<uint8_t>((0x640a01 * digits) >> 24);
|
||||
answer.ec = std::errc();
|
||||
answer.ptr = p + nd;
|
||||
return answer;
|
||||
@ -638,8 +654,8 @@ parse_int_string(UC const *p, UC const *pend, T &value,
|
||||
}
|
||||
|
||||
FASTFLOAT_IF_CONSTEXPR17((std::is_same<T, std::uint16_t>::value)) {
|
||||
if (base == 10) {
|
||||
const size_t len = size_t(pend - p);
|
||||
if (options.base == 10) {
|
||||
const auto len = static_cast<am_digits>(pend - p);
|
||||
if (len == 0) {
|
||||
if (has_leading_zeros) {
|
||||
value = 0;
|
||||
@ -652,22 +668,22 @@ parse_int_string(UC const *p, UC const *pend, T &value,
|
||||
return answer;
|
||||
}
|
||||
|
||||
if (len >= 4) {
|
||||
uint32_t digits = read4_to_u32(p);
|
||||
if (len >= sizeof(uint32_t)) {
|
||||
auto const digits = read_chars_to_unsigned<uint32_t>(p);
|
||||
if (is_made_of_four_digits_fast(digits)) {
|
||||
uint32_t v = parse_four_digits_unrolled(digits);
|
||||
auto v = parse_four_digits_unrolled(digits);
|
||||
if (len >= 5 && is_integer(p[4])) {
|
||||
v = v * 10 + uint32_t(p[4] - '0');
|
||||
v = v * 10 + static_cast<uint32_t>(p[4] - '0');
|
||||
if (len >= 6 && is_integer(p[5])) {
|
||||
answer.ec = std::errc::result_out_of_range;
|
||||
const UC *q = p + 5;
|
||||
const auto *q = p + 5;
|
||||
while (q != pend && is_integer(*q)) {
|
||||
q++;
|
||||
++q;
|
||||
}
|
||||
answer.ptr = q;
|
||||
return answer;
|
||||
}
|
||||
if (v > 65535) {
|
||||
if (v > std::numeric_limits<uint16_t>::max()) {
|
||||
answer.ec = std::errc::result_out_of_range;
|
||||
answer.ptr = p + 5;
|
||||
return answer;
|
||||
@ -687,20 +703,21 @@ parse_int_string(UC const *p, UC const *pend, T &value,
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t i = 0;
|
||||
if (base == 10) {
|
||||
// Parse digits
|
||||
am_mant_t i = 0;
|
||||
if (options.base == 10) {
|
||||
loop_parse_if_eight_digits(p, pend, i); // use SIMD if possible
|
||||
}
|
||||
while (p != pend) {
|
||||
uint8_t digit = ch_to_digit(*p);
|
||||
if (digit >= base) {
|
||||
auto const digit = ch_to_digit(*p);
|
||||
if (digit >= options.base) {
|
||||
break;
|
||||
}
|
||||
i = uint64_t(base) * i + digit; // might overflow, check this later
|
||||
p++;
|
||||
i = am_mant_t(options.base) * i + digit; // might overflow, check this later
|
||||
++p;
|
||||
}
|
||||
|
||||
size_t digit_count = size_t(p - start_digits);
|
||||
auto const digit_count = static_cast<am_digits>(p - start_digits);
|
||||
|
||||
if (digit_count == 0) {
|
||||
if (has_leading_zeros) {
|
||||
@ -717,30 +734,38 @@ parse_int_string(UC const *p, UC const *pend, T &value,
|
||||
answer.ptr = p;
|
||||
|
||||
// check u64 overflow
|
||||
size_t max_digits = max_digits_u64(base);
|
||||
auto const max_digits = max_digits_u64(options.base);
|
||||
if (digit_count > max_digits) {
|
||||
answer.ec = std::errc::result_out_of_range;
|
||||
return answer;
|
||||
}
|
||||
// this check can be eliminated for all other types, but they will all require
|
||||
// a max_digits(base) equivalent
|
||||
if (digit_count == max_digits && i < min_safe_u64(base)) {
|
||||
if (digit_count == max_digits && i < min_safe_u64(options.base)) {
|
||||
answer.ec = std::errc::result_out_of_range;
|
||||
return answer;
|
||||
}
|
||||
|
||||
// check other types overflow
|
||||
if (!std::is_same<T, uint64_t>::value) {
|
||||
if (i > uint64_t(std::numeric_limits<T>::max()) + uint64_t(negative)) {
|
||||
if (!std::is_same<T, am_mant_t>::value) {
|
||||
if (i > am_mant_t(std::numeric_limits<T>::max())
|
||||
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||
+ uint8_t(negative)
|
||||
#endif
|
||||
) {
|
||||
answer.ec = std::errc::result_out_of_range;
|
||||
return answer;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||
value = T(i);
|
||||
#else
|
||||
if (negative) {
|
||||
#ifdef FASTFLOAT_VISUAL_STUDIO
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable : 4146)
|
||||
#pragma warning(disable : 4804)
|
||||
#endif
|
||||
// this weird workaround is required because:
|
||||
// - converting unsigned to signed when its value is greater than signed max
|
||||
@ -749,13 +774,14 @@ parse_int_string(UC const *p, UC const *pend, T &value,
|
||||
// this is always optimized into a neg instruction (note: T is an integer
|
||||
// type)
|
||||
value = T(-std::numeric_limits<T>::max() -
|
||||
T(i - uint64_t(std::numeric_limits<T>::max())));
|
||||
T(i - am_mant_t(std::numeric_limits<T>::max())));
|
||||
#ifdef FASTFLOAT_VISUAL_STUDIO
|
||||
#pragma warning(pop)
|
||||
#endif
|
||||
} else {
|
||||
value = T(i);
|
||||
}
|
||||
#endif
|
||||
|
||||
answer.ec = std::errc();
|
||||
return answer;
|
||||
|
||||
@ -19,11 +19,11 @@ namespace fast_float {
|
||||
#if defined(FASTFLOAT_64BIT) && !defined(__sparc)
|
||||
#define FASTFLOAT_64BIT_LIMB 1
|
||||
typedef uint64_t limb;
|
||||
constexpr size_t limb_bits = 64;
|
||||
constexpr limb_t limb_bits = 64;
|
||||
#else
|
||||
#define FASTFLOAT_32BIT_LIMB
|
||||
typedef uint32_t limb;
|
||||
constexpr size_t limb_bits = 32;
|
||||
constexpr limb_t limb_bits = 32;
|
||||
#endif
|
||||
|
||||
typedef span<limb> limb_span;
|
||||
@ -32,59 +32,58 @@ typedef span<limb> limb_span;
|
||||
// of bits required to store the largest bigint, which is
|
||||
// `log2(10**(digits + max_exp))`, or `log2(10**(767 + 342))`, or
|
||||
// ~3600 bits, so we round to 4000.
|
||||
constexpr size_t bigint_bits = 4000;
|
||||
constexpr size_t bigint_limbs = bigint_bits / limb_bits;
|
||||
typedef uint_fast16_t bigint_bits_t;
|
||||
constexpr bigint_bits_t bigint_bits = 4000;
|
||||
constexpr limb_t bigint_limbs = bigint_bits / limb_bits;
|
||||
|
||||
// vector-like type that is allocated on the stack. the entire
|
||||
// buffer is pre-allocated, and only the length changes.
|
||||
template <uint16_t size> struct stackvec {
|
||||
template <limb_t size> struct stackvec {
|
||||
limb data[size];
|
||||
// we never need more than 150 limbs
|
||||
uint16_t length{0};
|
||||
limb_t length{0};
|
||||
|
||||
stackvec() = default;
|
||||
FASTFLOAT_CONSTEXPR20 stackvec() noexcept = default;
|
||||
stackvec(stackvec const &) = delete;
|
||||
stackvec &operator=(stackvec const &) = delete;
|
||||
stackvec(stackvec &&) = delete;
|
||||
stackvec &operator=(stackvec &&other) = delete;
|
||||
|
||||
// create stack vector from existing limb span.
|
||||
FASTFLOAT_CONSTEXPR20 stackvec(limb_span s) {
|
||||
FASTFLOAT_CONSTEXPR20 stackvec(limb_span s) noexcept {
|
||||
FASTFLOAT_ASSERT(try_extend(s));
|
||||
}
|
||||
|
||||
FASTFLOAT_CONSTEXPR14 limb &operator[](size_t index) noexcept {
|
||||
FASTFLOAT_CONSTEXPR14 limb &operator[](limb_t index) noexcept {
|
||||
FASTFLOAT_DEBUG_ASSERT(index < length);
|
||||
return data[index];
|
||||
}
|
||||
|
||||
FASTFLOAT_CONSTEXPR14 const limb &operator[](size_t index) const noexcept {
|
||||
FASTFLOAT_CONSTEXPR14 const limb &operator[](limb_t index) const noexcept {
|
||||
FASTFLOAT_DEBUG_ASSERT(index < length);
|
||||
return data[index];
|
||||
}
|
||||
|
||||
// index from the end of the container
|
||||
FASTFLOAT_CONSTEXPR14 const limb &rindex(size_t index) const noexcept {
|
||||
FASTFLOAT_CONSTEXPR14 const limb &rindex(limb_t index) const noexcept {
|
||||
FASTFLOAT_DEBUG_ASSERT(index < length);
|
||||
size_t rindex = length - index - 1;
|
||||
auto rindex = length - index - 1;
|
||||
return data[rindex];
|
||||
}
|
||||
|
||||
// set the length, without bounds checking.
|
||||
FASTFLOAT_CONSTEXPR14 void set_len(size_t len) noexcept {
|
||||
length = uint16_t(len);
|
||||
}
|
||||
FASTFLOAT_CONSTEXPR14 void set_len(limb_t len) noexcept { length = len; }
|
||||
|
||||
constexpr size_t len() const noexcept { return length; }
|
||||
constexpr limb_t len() const noexcept { return length; }
|
||||
|
||||
constexpr bool is_empty() const noexcept { return length == 0; }
|
||||
|
||||
constexpr size_t capacity() const noexcept { return size; }
|
||||
constexpr limb_t capacity() const noexcept { return size; }
|
||||
|
||||
// append item to vector, without bounds checking
|
||||
FASTFLOAT_CONSTEXPR14 void push_unchecked(limb value) noexcept {
|
||||
data[length] = value;
|
||||
length++;
|
||||
++length;
|
||||
}
|
||||
|
||||
// append item to vector, returning if item was added
|
||||
@ -101,7 +100,7 @@ template <uint16_t size> struct stackvec {
|
||||
FASTFLOAT_CONSTEXPR20 void extend_unchecked(limb_span s) noexcept {
|
||||
limb *ptr = data + length;
|
||||
std::copy_n(s.ptr, s.len(), ptr);
|
||||
set_len(len() + s.len());
|
||||
set_len(len() + static_cast<limb_t>(s.len()));
|
||||
}
|
||||
|
||||
// try to add items to the vector, returning if items were added
|
||||
@ -118,9 +117,9 @@ template <uint16_t size> struct stackvec {
|
||||
// if the new size is longer than the vector, assign value to each
|
||||
// appended item.
|
||||
FASTFLOAT_CONSTEXPR20
|
||||
void resize_unchecked(size_t new_len, limb value) noexcept {
|
||||
void resize_unchecked(limb_t new_len, limb value) noexcept {
|
||||
if (new_len > len()) {
|
||||
size_t count = new_len - len();
|
||||
auto count = new_len - len();
|
||||
limb *first = data + len();
|
||||
limb *last = first + count;
|
||||
::std::fill(first, last, value);
|
||||
@ -131,7 +130,7 @@ template <uint16_t size> struct stackvec {
|
||||
}
|
||||
|
||||
// try to resize the vector, returning if the vector was resized.
|
||||
FASTFLOAT_CONSTEXPR20 bool try_resize(size_t new_len, limb value) noexcept {
|
||||
FASTFLOAT_CONSTEXPR20 bool try_resize(limb_t new_len, limb value) noexcept {
|
||||
if (new_len > capacity()) {
|
||||
return false;
|
||||
} else {
|
||||
@ -143,12 +142,12 @@ template <uint16_t size> struct stackvec {
|
||||
// check if any limbs are non-zero after the given index.
|
||||
// this needs to be done in reverse order, since the index
|
||||
// is relative to the most significant limbs.
|
||||
FASTFLOAT_CONSTEXPR14 bool nonzero(size_t index) const noexcept {
|
||||
FASTFLOAT_CONSTEXPR14 bool nonzero(limb_t index) const noexcept {
|
||||
while (index < len()) {
|
||||
if (rindex(index) != 0) {
|
||||
return true;
|
||||
}
|
||||
index++;
|
||||
++index;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@ -156,7 +155,7 @@ template <uint16_t size> struct stackvec {
|
||||
// normalize the big integer, so most-significant zero limbs are removed.
|
||||
FASTFLOAT_CONSTEXPR14 void normalize() noexcept {
|
||||
while (len() > 0 && rindex(0) == 0) {
|
||||
length--;
|
||||
--length;
|
||||
}
|
||||
}
|
||||
};
|
||||
@ -170,18 +169,18 @@ empty_hi64(bool &truncated) noexcept {
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t
|
||||
uint64_hi64(uint64_t r0, bool &truncated) noexcept {
|
||||
truncated = false;
|
||||
int shl = leading_zeroes(r0);
|
||||
auto shl = leading_zeroes(r0);
|
||||
return r0 << shl;
|
||||
}
|
||||
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t
|
||||
uint64_hi64(uint64_t r0, uint64_t r1, bool &truncated) noexcept {
|
||||
int shl = leading_zeroes(r0);
|
||||
auto shl = leading_zeroes(r0);
|
||||
if (shl == 0) {
|
||||
truncated = r1 != 0;
|
||||
return r0;
|
||||
} else {
|
||||
int shr = 64 - shl;
|
||||
limb_t shr = 64 - shl;
|
||||
truncated = (r1 << shl) != 0;
|
||||
return (r0 << shl) | (r1 >> shr);
|
||||
}
|
||||
@ -258,16 +257,14 @@ scalar_mul(limb x, limb y, limb &carry) noexcept {
|
||||
|
||||
// add scalar value to bigint starting from offset.
|
||||
// used in grade school multiplication
|
||||
template <uint16_t size>
|
||||
inline FASTFLOAT_CONSTEXPR20 bool small_add_from(stackvec<size> &vec, limb y,
|
||||
size_t start) noexcept {
|
||||
size_t index = start;
|
||||
limb carry = y;
|
||||
template <limb_t size>
|
||||
inline FASTFLOAT_CONSTEXPR20 bool
|
||||
small_add_from(stackvec<size> &vec, limb carry, limb_t start) noexcept {
|
||||
bool overflow;
|
||||
while (carry != 0 && index < vec.len()) {
|
||||
vec[index] = scalar_add(vec[index], carry, overflow);
|
||||
while (carry != 0 && start < vec.len()) {
|
||||
vec[start] = scalar_add(vec[start], carry, overflow);
|
||||
carry = limb(overflow);
|
||||
index += 1;
|
||||
++start;
|
||||
}
|
||||
if (carry != 0) {
|
||||
FASTFLOAT_TRY(vec.try_push(carry));
|
||||
@ -276,18 +273,18 @@ inline FASTFLOAT_CONSTEXPR20 bool small_add_from(stackvec<size> &vec, limb y,
|
||||
}
|
||||
|
||||
// add scalar value to bigint.
|
||||
template <uint16_t size>
|
||||
template <limb_t size>
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool
|
||||
small_add(stackvec<size> &vec, limb y) noexcept {
|
||||
return small_add_from(vec, y, 0);
|
||||
}
|
||||
|
||||
// multiply bigint by scalar value.
|
||||
template <uint16_t size>
|
||||
template <limb_t size>
|
||||
inline FASTFLOAT_CONSTEXPR20 bool small_mul(stackvec<size> &vec,
|
||||
limb y) noexcept {
|
||||
limb carry = 0;
|
||||
for (size_t index = 0; index < vec.len(); index++) {
|
||||
for (limb_t index = 0; index != vec.len(); ++index) {
|
||||
vec[index] = scalar_mul(vec[index], y, carry);
|
||||
}
|
||||
if (carry != 0) {
|
||||
@ -298,17 +295,18 @@ inline FASTFLOAT_CONSTEXPR20 bool small_mul(stackvec<size> &vec,
|
||||
|
||||
// add bigint to bigint starting from index.
|
||||
// used in grade school multiplication
|
||||
template <uint16_t size>
|
||||
template <limb_t size>
|
||||
FASTFLOAT_CONSTEXPR20 bool large_add_from(stackvec<size> &x, limb_span y,
|
||||
size_t start) noexcept {
|
||||
limb_t start) noexcept {
|
||||
// the effective x buffer is from `xstart..x.len()`, so exit early
|
||||
// if we can't get that current range.
|
||||
if (x.len() < start || y.len() > x.len() - start) {
|
||||
FASTFLOAT_TRY(x.try_resize(y.len() + start, 0));
|
||||
if (x.len() < start ||
|
||||
y.len() > static_cast<uint_fast16_t>(x.len() - start)) {
|
||||
FASTFLOAT_TRY(x.try_resize(static_cast<limb_t>(y.len()) + start, 0));
|
||||
}
|
||||
|
||||
bool carry = false;
|
||||
for (size_t index = 0; index < y.len(); index++) {
|
||||
for (limb_t index = 0; index != y.len(); ++index) {
|
||||
limb xi = x[index + start];
|
||||
limb yi = y[index];
|
||||
bool c1 = false;
|
||||
@ -323,20 +321,20 @@ FASTFLOAT_CONSTEXPR20 bool large_add_from(stackvec<size> &x, limb_span y,
|
||||
|
||||
// handle overflow
|
||||
if (carry) {
|
||||
FASTFLOAT_TRY(small_add_from(x, 1, y.len() + start));
|
||||
FASTFLOAT_TRY(small_add_from(x, 1, static_cast<limb_t>(y.len()) + start));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// add bigint to bigint.
|
||||
template <uint16_t size>
|
||||
template <limb_t size>
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool
|
||||
large_add_from(stackvec<size> &x, limb_span y) noexcept {
|
||||
return large_add_from(x, y, 0);
|
||||
}
|
||||
|
||||
// grade-school multiplication algorithm
|
||||
template <uint16_t size>
|
||||
template <limb_t size>
|
||||
FASTFLOAT_CONSTEXPR20 bool long_mul(stackvec<size> &x, limb_span y) noexcept {
|
||||
limb_span xs = limb_span(x.data, x.len());
|
||||
stackvec<size> z(xs);
|
||||
@ -345,7 +343,7 @@ FASTFLOAT_CONSTEXPR20 bool long_mul(stackvec<size> &x, limb_span y) noexcept {
|
||||
if (y.len() != 0) {
|
||||
limb y0 = y[0];
|
||||
FASTFLOAT_TRY(small_mul(x, y0));
|
||||
for (size_t index = 1; index < y.len(); index++) {
|
||||
for (limb_t index = 1; index < y.len(); ++index) {
|
||||
limb yi = y[index];
|
||||
stackvec<size> zi;
|
||||
if (yi != 0) {
|
||||
@ -364,7 +362,7 @@ FASTFLOAT_CONSTEXPR20 bool long_mul(stackvec<size> &x, limb_span y) noexcept {
|
||||
}
|
||||
|
||||
// grade-school multiplication algorithm
|
||||
template <uint16_t size>
|
||||
template <limb_t size>
|
||||
FASTFLOAT_CONSTEXPR20 bool large_mul(stackvec<size> &x, limb_span y) noexcept {
|
||||
if (y.len() == 1) {
|
||||
FASTFLOAT_TRY(small_mul(x, y[0]));
|
||||
@ -375,7 +373,7 @@ FASTFLOAT_CONSTEXPR20 bool large_mul(stackvec<size> &x, limb_span y) noexcept {
|
||||
}
|
||||
|
||||
template <typename = void> struct pow5_tables {
|
||||
static constexpr uint32_t large_step = 135;
|
||||
static constexpr limb_t large_step = 135;
|
||||
static constexpr uint64_t small_power_of_5[] = {
|
||||
1UL,
|
||||
5UL,
|
||||
@ -419,7 +417,7 @@ template <typename = void> struct pow5_tables {
|
||||
|
||||
#if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE
|
||||
|
||||
template <typename T> constexpr uint32_t pow5_tables<T>::large_step;
|
||||
template <typename T> constexpr limb_t pow5_tables<T>::large_step;
|
||||
|
||||
template <typename T> constexpr uint64_t pow5_tables<T>::small_power_of_5[];
|
||||
|
||||
@ -435,14 +433,14 @@ struct bigint : pow5_tables<> {
|
||||
// storage of the limbs, in little-endian order.
|
||||
stackvec<bigint_limbs> vec;
|
||||
|
||||
FASTFLOAT_CONSTEXPR20 bigint() : vec() {}
|
||||
FASTFLOAT_CONSTEXPR20 bigint() noexcept : vec() {}
|
||||
|
||||
bigint(bigint const &) = delete;
|
||||
bigint &operator=(bigint const &) = delete;
|
||||
bigint(bigint &&) = delete;
|
||||
bigint &operator=(bigint &&other) = delete;
|
||||
|
||||
FASTFLOAT_CONSTEXPR20 bigint(uint64_t value) : vec() {
|
||||
FASTFLOAT_CONSTEXPR20 bigint(uint64_t value) noexcept : vec() {
|
||||
#ifdef FASTFLOAT_64BIT_LIMB
|
||||
vec.push_unchecked(value);
|
||||
#else
|
||||
@ -493,7 +491,7 @@ struct bigint : pow5_tables<> {
|
||||
} else if (vec.len() < other.vec.len()) {
|
||||
return -1;
|
||||
} else {
|
||||
for (size_t index = vec.len(); index > 0; index--) {
|
||||
for (limb_t index = vec.len(); index > 0; --index) {
|
||||
limb xi = vec[index - 1];
|
||||
limb yi = other.vec[index - 1];
|
||||
if (xi > yi) {
|
||||
@ -508,7 +506,7 @@ struct bigint : pow5_tables<> {
|
||||
|
||||
// shift left each limb n bits, carrying over to the new limb
|
||||
// returns true if we were able to shift all the digits.
|
||||
FASTFLOAT_CONSTEXPR20 bool shl_bits(size_t n) noexcept {
|
||||
FASTFLOAT_CONSTEXPR20 bool shl_bits(limb_t n) noexcept {
|
||||
// Internally, for each item, we shift left by n, and add the previous
|
||||
// right shifted limb-bits.
|
||||
// For example, we transform (for u8) shifted left 2, to:
|
||||
@ -517,10 +515,10 @@ struct bigint : pow5_tables<> {
|
||||
FASTFLOAT_DEBUG_ASSERT(n != 0);
|
||||
FASTFLOAT_DEBUG_ASSERT(n < sizeof(limb) * 8);
|
||||
|
||||
size_t shl = n;
|
||||
size_t shr = limb_bits - shl;
|
||||
limb_t const shl = n;
|
||||
limb_t const shr = limb_bits - shl;
|
||||
limb prev = 0;
|
||||
for (size_t index = 0; index < vec.len(); index++) {
|
||||
for (limb_t index = 0; index != vec.len(); ++index) {
|
||||
limb xi = vec[index];
|
||||
vec[index] = (xi << shl) | (prev >> shr);
|
||||
prev = xi;
|
||||
@ -534,9 +532,10 @@ struct bigint : pow5_tables<> {
|
||||
}
|
||||
|
||||
// move the limbs left by `n` limbs.
|
||||
FASTFLOAT_CONSTEXPR20 bool shl_limbs(size_t n) noexcept {
|
||||
FASTFLOAT_CONSTEXPR20 bool shl_limbs(limb_t n) noexcept {
|
||||
FASTFLOAT_DEBUG_ASSERT(n != 0);
|
||||
if (n + vec.len() > vec.capacity()) {
|
||||
// we can't shift more than the capacity of the vector.
|
||||
return false;
|
||||
} else if (!vec.is_empty()) {
|
||||
// move limbs
|
||||
@ -555,9 +554,9 @@ struct bigint : pow5_tables<> {
|
||||
}
|
||||
|
||||
// move the limbs left by `n` bits.
|
||||
FASTFLOAT_CONSTEXPR20 bool shl(size_t n) noexcept {
|
||||
size_t rem = n % limb_bits;
|
||||
size_t div = n / limb_bits;
|
||||
FASTFLOAT_CONSTEXPR20 bool shl(bigint_bits_t n) noexcept {
|
||||
auto const rem = static_cast<limb_t>(n % limb_bits);
|
||||
auto const div = static_cast<limb_t>(n / limb_bits);
|
||||
if (rem != 0) {
|
||||
FASTFLOAT_TRY(shl_bits(rem));
|
||||
}
|
||||
@ -568,8 +567,9 @@ struct bigint : pow5_tables<> {
|
||||
}
|
||||
|
||||
// get the number of leading zeros in the bigint.
|
||||
FASTFLOAT_CONSTEXPR20 int ctlz() const noexcept {
|
||||
FASTFLOAT_CONSTEXPR20 bigint_bits_t ctlz() const noexcept {
|
||||
if (vec.is_empty()) {
|
||||
// empty vector, no bits, no zeros.
|
||||
return 0;
|
||||
} else {
|
||||
#ifdef FASTFLOAT_64BIT_LIMB
|
||||
@ -583,9 +583,9 @@ struct bigint : pow5_tables<> {
|
||||
}
|
||||
|
||||
// get the number of bits in the bigint.
|
||||
FASTFLOAT_CONSTEXPR20 int bit_length() const noexcept {
|
||||
int lz = ctlz();
|
||||
return int(limb_bits * vec.len()) - lz;
|
||||
FASTFLOAT_CONSTEXPR20 bigint_bits_t bit_length() const noexcept {
|
||||
auto lz = ctlz();
|
||||
return limb_bits * vec.len() - lz;
|
||||
}
|
||||
|
||||
FASTFLOAT_CONSTEXPR20 bool mul(limb y) noexcept { return small_mul(vec, y); }
|
||||
@ -593,23 +593,27 @@ struct bigint : pow5_tables<> {
|
||||
FASTFLOAT_CONSTEXPR20 bool add(limb y) noexcept { return small_add(vec, y); }
|
||||
|
||||
// multiply as if by 2 raised to a power.
|
||||
FASTFLOAT_CONSTEXPR20 bool pow2(uint32_t exp) noexcept { return shl(exp); }
|
||||
FASTFLOAT_CONSTEXPR20 bool pow2(am_pow_t const exp) noexcept {
|
||||
FASTFLOAT_ASSERT(exp >= 0);
|
||||
return shl(static_cast<fast_float::bigint_bits_t>(exp));
|
||||
}
|
||||
|
||||
// multiply as if by 5 raised to a power.
|
||||
FASTFLOAT_CONSTEXPR20 bool pow5(uint32_t exp) noexcept {
|
||||
FASTFLOAT_CONSTEXPR20 bool pow5(am_pow_t exp) noexcept {
|
||||
FASTFLOAT_ASSERT(exp >= 0);
|
||||
// multiply by a power of 5
|
||||
size_t large_length = sizeof(large_power_of_5) / sizeof(limb);
|
||||
limb_span large = limb_span(large_power_of_5, large_length);
|
||||
limb_t const large_length = sizeof(large_power_of_5) / sizeof(limb);
|
||||
limb_span const large = limb_span(large_power_of_5, large_length);
|
||||
while (exp >= large_step) {
|
||||
FASTFLOAT_TRY(large_mul(vec, large));
|
||||
exp -= large_step;
|
||||
}
|
||||
#ifdef FASTFLOAT_64BIT_LIMB
|
||||
uint32_t small_step = 27;
|
||||
limb max_native = 7450580596923828125UL;
|
||||
limb_t constexpr small_step = 27;
|
||||
limb constexpr max_native = 7450580596923828125UL;
|
||||
#else
|
||||
uint32_t small_step = 13;
|
||||
limb max_native = 1220703125U;
|
||||
limb_t constexpr small_step = 13;
|
||||
limb constexpr max_native = 1220703125U;
|
||||
#endif
|
||||
while (exp >= small_step) {
|
||||
FASTFLOAT_TRY(small_mul(vec, max_native));
|
||||
@ -627,7 +631,8 @@ struct bigint : pow5_tables<> {
|
||||
}
|
||||
|
||||
// multiply as if by 10 raised to a power.
|
||||
FASTFLOAT_CONSTEXPR20 bool pow10(uint32_t exp) noexcept {
|
||||
FASTFLOAT_CONSTEXPR20 bool pow10(am_pow_t exp) noexcept {
|
||||
FASTFLOAT_ASSERT(exp >= 0);
|
||||
FASTFLOAT_TRY(pow5(exp));
|
||||
return pow2(exp);
|
||||
}
|
||||
|
||||
@ -7,13 +7,30 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// Testing for https://wg21.link/N3652, adopted in C++14
|
||||
#if defined(__cpp_constexpr) && __cpp_constexpr >= 201304
|
||||
// C++14 constexpr
|
||||
#if defined(__cpp_constexpr) && __cpp_constexpr >= 201304L
|
||||
#define FASTFLOAT_CONSTEXPR14 constexpr
|
||||
#elif __cplusplus >= 201402L
|
||||
#define FASTFLOAT_CONSTEXPR14 constexpr
|
||||
#elif defined(_MSC_VER) && _MSC_VER >= 1910 && _MSVC_LANG >= 201402L
|
||||
#define FASTFLOAT_CONSTEXPR14 constexpr
|
||||
#else
|
||||
#define FASTFLOAT_CONSTEXPR14
|
||||
#endif
|
||||
|
||||
// C++14 variable templates
|
||||
#if defined(__cpp_variable_templates) && __cpp_variable_templates >= 201304L
|
||||
#define FASTFLOAT_HAS_VARIABLE_TEMPLATES 1
|
||||
#elif __cplusplus >= 201402L
|
||||
#define FASTFLOAT_HAS_VARIABLE_TEMPLATES 1
|
||||
#elif defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 190023918L && \
|
||||
_MSVC_LANG >= 201402L
|
||||
#define FASTFLOAT_HAS_VARIABLE_TEMPLATES 1
|
||||
#else
|
||||
#define FASTFLOAT_HAS_VARIABLE_TEMPLATES 0
|
||||
#endif
|
||||
|
||||
// C++20 std::bit_cast
|
||||
#if defined(__cpp_lib_bit_cast) && __cpp_lib_bit_cast >= 201806L
|
||||
#define FASTFLOAT_HAS_BIT_CAST 1
|
||||
#else
|
||||
@ -23,16 +40,42 @@
|
||||
#if defined(__cpp_lib_is_constant_evaluated) && \
|
||||
__cpp_lib_is_constant_evaluated >= 201811L
|
||||
#define FASTFLOAT_HAS_IS_CONSTANT_EVALUATED 1
|
||||
#define FASTFLOAT_CONSTEVAL consteval
|
||||
#else
|
||||
#define FASTFLOAT_HAS_IS_CONSTANT_EVALUATED 0
|
||||
#define FASTFLOAT_CONSTEVAL FASTFLOAT_CONSTEXPR14
|
||||
#endif
|
||||
|
||||
#if defined(__cpp_lib_byteswap)
|
||||
#define FASTFLOAT_HAS_BYTESWAP 1
|
||||
#else
|
||||
#define FASTFLOAT_HAS_BYTESWAP 0
|
||||
#endif
|
||||
|
||||
// C++17 if constexpr
|
||||
#if defined(__cpp_if_constexpr) && __cpp_if_constexpr >= 201606L
|
||||
#define FASTFLOAT_IF_CONSTEXPR17(x) if constexpr (x)
|
||||
#elif defined(__cpp_constexpr) && __cpp_constexpr >= 201603L
|
||||
#define FASTFLOAT_IF_CONSTEXPR17(x) if constexpr (x)
|
||||
#elif __cplusplus >= 201703L
|
||||
#define FASTFLOAT_IF_CONSTEXPR17(x) if constexpr (x)
|
||||
#elif defined(_MSC_VER) && _MSC_VER >= 1911 && _MSVC_LANG >= 201703L
|
||||
#define FASTFLOAT_IF_CONSTEXPR17(x) if constexpr (x)
|
||||
#else
|
||||
#define FASTFLOAT_IF_CONSTEXPR17(x) if (x)
|
||||
#endif
|
||||
|
||||
// C++17 inline variables
|
||||
#if defined(__cpp_inline_variables) && __cpp_inline_variables >= 201606L
|
||||
#define FASTFLOAT_INLINE_VARIABLE inline constexpr
|
||||
#elif __cplusplus >= 201703L
|
||||
#define FASTFLOAT_INLINE_VARIABLE inline constexpr
|
||||
#elif defined(_MSC_VER) && _MSC_VER >= 1912 && _MSVC_LANG >= 201703L
|
||||
#define FASTFLOAT_INLINE_VARIABLE inline constexpr
|
||||
#else
|
||||
#define FASTFLOAT_INLINE_VARIABLE static constexpr
|
||||
#endif
|
||||
|
||||
// Testing for relevant C++20 constexpr library features
|
||||
#if FASTFLOAT_HAS_IS_CONSTANT_EVALUATED && FASTFLOAT_HAS_BIT_CAST && \
|
||||
defined(__cpp_lib_constexpr_algorithms) && \
|
||||
@ -50,4 +93,17 @@
|
||||
#define FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE 1
|
||||
#endif
|
||||
|
||||
#if defined(__has_builtin)
|
||||
#define FASTFLOAT_HAS_BUILTIN(x) __has_builtin(x)
|
||||
#else
|
||||
#define FASTFLOAT_HAS_BUILTIN(x) false
|
||||
#endif
|
||||
|
||||
// For support attribute [[assume]] is declared in P1774
|
||||
#if defined(__cpp_attrubute_assume)
|
||||
#define FASTFLOAT_ASSUME(expr) [[assume(expr)]]
|
||||
#else
|
||||
#define FASTFLOAT_ASSUME(expr)
|
||||
#endif
|
||||
|
||||
#endif // FASTFLOAT_CONSTEXPR_FEATURE_DETECT_H
|
||||
|
||||
@ -17,30 +17,32 @@ namespace fast_float {
|
||||
// most significant bits and the low part corresponding to the least significant
|
||||
// bits.
|
||||
//
|
||||
template <int bit_precision>
|
||||
template <am_bits_t bit_precision>
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 value128
|
||||
compute_product_approximation(int64_t q, uint64_t w) {
|
||||
int const index = 2 * int(q - powers::smallest_power_of_five);
|
||||
compute_product_approximation(am_pow_t q, am_mant_t w) noexcept {
|
||||
am_pow_t const index = 2 * (q - powers::smallest_power_of_five);
|
||||
// For small values of q, e.g., q in [0,27], the answer is always exact
|
||||
// because The line value128 firstproduct = full_multiplication(w,
|
||||
// power_of_five_128[index]); gives the exact answer.
|
||||
value128 firstproduct =
|
||||
full_multiplication(w, powers::power_of_five_128[index]);
|
||||
static_assert((bit_precision >= 0) && (bit_precision <= 64),
|
||||
" precision should be in (0,64]");
|
||||
" precision should be in [0,64]");
|
||||
constexpr uint64_t precision_mask =
|
||||
(bit_precision < 64) ? (uint64_t(0xFFFFFFFFFFFFFFFF) >> bit_precision)
|
||||
: uint64_t(0xFFFFFFFFFFFFFFFF);
|
||||
|
||||
if ((firstproduct.high & precision_mask) ==
|
||||
precision_mask) { // could further guard with (lower + w < lower)
|
||||
// regarding the second product, we only need secondproduct.high, but our
|
||||
// expectation is that the compiler will optimize this extra work away if
|
||||
// needed.
|
||||
value128 secondproduct =
|
||||
value128 const secondproduct =
|
||||
full_multiplication(w, powers::power_of_five_128[index + 1]);
|
||||
firstproduct.low += secondproduct.high;
|
||||
|
||||
if (secondproduct.high > firstproduct.low) {
|
||||
firstproduct.high++;
|
||||
++firstproduct.high;
|
||||
}
|
||||
}
|
||||
return firstproduct;
|
||||
@ -62,7 +64,7 @@ namespace detail {
|
||||
* where
|
||||
* p = log(5**-q)/log(2) = -q * log(5)/log(2)
|
||||
*/
|
||||
constexpr fastfloat_really_inline int32_t power(int32_t q) noexcept {
|
||||
constexpr fastfloat_really_inline am_pow_t power(am_pow_t const q) noexcept {
|
||||
return (((152170 + 65536) * q) >> 16) + 63;
|
||||
}
|
||||
} // namespace detail
|
||||
@ -71,13 +73,13 @@ constexpr fastfloat_really_inline int32_t power(int32_t q) noexcept {
|
||||
// for significant digits already multiplied by 10 ** q.
|
||||
template <typename binary>
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR14 adjusted_mantissa
|
||||
compute_error_scaled(int64_t q, uint64_t w, int lz) noexcept {
|
||||
int hilz = int(w >> 63) ^ 1;
|
||||
compute_error_scaled(am_pow_t q, am_mant_t w, limb_t lz) noexcept {
|
||||
auto const hilz = static_cast<am_bits_t>((w >> 63) ^ 1);
|
||||
adjusted_mantissa answer;
|
||||
answer.mantissa = w << hilz;
|
||||
int bias = binary::mantissa_explicit_bits() - binary::minimum_exponent();
|
||||
answer.power2 = int32_t(detail::power(int32_t(q)) + bias - hilz - lz - 62 +
|
||||
invalid_am_bias);
|
||||
constexpr am_pow_t bias =
|
||||
binary::mantissa_explicit_bits() - binary::minimum_exponent();
|
||||
answer.power2 = detail::power(q) + bias - hilz - lz - 62 + invalid_am_bias;
|
||||
return answer;
|
||||
}
|
||||
|
||||
@ -85,10 +87,10 @@ compute_error_scaled(int64_t q, uint64_t w, int lz) noexcept {
|
||||
// the power2 in the exponent will be adjusted by invalid_am_bias.
|
||||
template <typename binary>
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
|
||||
compute_error(int64_t q, uint64_t w) noexcept {
|
||||
int lz = leading_zeroes(w);
|
||||
compute_error(am_pow_t q, am_mant_t w) noexcept {
|
||||
auto const lz = leading_zeroes(w);
|
||||
w <<= lz;
|
||||
value128 product =
|
||||
value128 const product =
|
||||
compute_product_approximation<binary::mantissa_explicit_bits() + 3>(q, w);
|
||||
return compute_error_scaled<binary>(q, product.high, lz);
|
||||
}
|
||||
@ -100,12 +102,12 @@ compute_error(int64_t q, uint64_t w) noexcept {
|
||||
// should recompute in such cases.
|
||||
template <typename binary>
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
|
||||
compute_float(int64_t q, uint64_t w) noexcept {
|
||||
compute_float(am_pow_t q, am_mant_t w) noexcept {
|
||||
adjusted_mantissa answer;
|
||||
if ((w == 0) || (q < binary::smallest_power_of_ten())) {
|
||||
// we want to get zero:
|
||||
answer.power2 = 0;
|
||||
answer.mantissa = 0;
|
||||
// result should be zero
|
||||
return answer;
|
||||
}
|
||||
if (q > binary::largest_power_of_ten()) {
|
||||
@ -114,11 +116,12 @@ compute_float(int64_t q, uint64_t w) noexcept {
|
||||
answer.mantissa = 0;
|
||||
return answer;
|
||||
}
|
||||
|
||||
// At this point in time q is in [powers::smallest_power_of_five,
|
||||
// powers::largest_power_of_five].
|
||||
|
||||
// We want the most significant bit of i to be 1. Shift if needed.
|
||||
int lz = leading_zeroes(w);
|
||||
auto const lz = leading_zeroes(w);
|
||||
w <<= lz;
|
||||
|
||||
// The required precision is binary::mantissa_explicit_bits() + 3 because
|
||||
@ -127,7 +130,7 @@ compute_float(int64_t q, uint64_t w) noexcept {
|
||||
// 3. We might lose a bit due to the "upperbit" routine (result too small,
|
||||
// requiring a shift)
|
||||
|
||||
value128 product =
|
||||
value128 const product =
|
||||
compute_product_approximation<binary::mantissa_explicit_bits() + 3>(q, w);
|
||||
// The computed 'product' is always sufficient.
|
||||
// Mathematical proof:
|
||||
@ -138,14 +141,18 @@ compute_float(int64_t q, uint64_t w) noexcept {
|
||||
// branchless approach: value128 product = compute_product(q, w); but in
|
||||
// practice, we can win big with the compute_product_approximation if its
|
||||
// additional branch is easily predicted. Which is best is data specific.
|
||||
int upperbit = int(product.high >> 63);
|
||||
int shift = upperbit + 64 - binary::mantissa_explicit_bits() - 3;
|
||||
auto const upperbit = static_cast<am_bits_t>(product.high >> 63);
|
||||
auto const shift = static_cast<am_bits_t>(
|
||||
upperbit + 64 - binary::mantissa_explicit_bits() - 3);
|
||||
|
||||
// Shift right the mantissa to the correct position
|
||||
answer.mantissa = product.high >> shift;
|
||||
|
||||
answer.power2 = int32_t(detail::power(int32_t(q)) + upperbit - lz -
|
||||
binary::minimum_exponent());
|
||||
if (answer.power2 <= 0) { // we have a subnormal?
|
||||
answer.power2 = detail::power(q) + upperbit - lz - binary::minimum_exponent();
|
||||
|
||||
// Now, we need to round the mantissa correctly.
|
||||
|
||||
if (answer.power2 <= 0) { // we have a subnormal or very small value.
|
||||
// Here have that answer.power2 <= 0 so -answer.power2 >= 0
|
||||
if (-answer.power2 + 1 >=
|
||||
64) { // if we have more than 64 bits below the minimum exponent, you
|
||||
@ -155,6 +162,7 @@ compute_float(int64_t q, uint64_t w) noexcept {
|
||||
// result should be zero
|
||||
return answer;
|
||||
}
|
||||
// We have a subnormal number. We need to shift the mantissa to the right
|
||||
// next line is safe because -answer.power2 + 1 < 64
|
||||
answer.mantissa >>= -answer.power2 + 1;
|
||||
// Thankfully, we can't have both "round-to-even" and subnormals because
|
||||
@ -170,7 +178,7 @@ compute_float(int64_t q, uint64_t w) noexcept {
|
||||
// subnormal, but we can only know this after rounding.
|
||||
// So we only declare a subnormal if we are smaller than the threshold.
|
||||
answer.power2 =
|
||||
(answer.mantissa < (uint64_t(1) << binary::mantissa_explicit_bits()))
|
||||
(answer.mantissa < (am_mant_t(1) << binary::mantissa_explicit_bits()))
|
||||
? 0
|
||||
: 1;
|
||||
return answer;
|
||||
@ -188,22 +196,25 @@ compute_float(int64_t q, uint64_t w) noexcept {
|
||||
// ... we dropped out only zeroes. But if this happened, then we can go
|
||||
// back!!!
|
||||
if ((answer.mantissa << shift) == product.high) {
|
||||
answer.mantissa &= ~uint64_t(1); // flip it so that we do not round up
|
||||
answer.mantissa &= ~am_mant_t(1); // flip it so that we do not round up
|
||||
}
|
||||
}
|
||||
|
||||
// Normal rounding
|
||||
answer.mantissa += (answer.mantissa & 1); // round up
|
||||
answer.mantissa >>= 1;
|
||||
if (answer.mantissa >= (uint64_t(2) << binary::mantissa_explicit_bits())) {
|
||||
answer.mantissa = (uint64_t(1) << binary::mantissa_explicit_bits());
|
||||
answer.power2++; // undo previous addition
|
||||
if (answer.mantissa >= (am_mant_t(2) << binary::mantissa_explicit_bits())) {
|
||||
answer.mantissa = (am_mant_t(1) << binary::mantissa_explicit_bits());
|
||||
++answer.power2; // undo previous line addition
|
||||
}
|
||||
|
||||
answer.mantissa &= ~(uint64_t(1) << binary::mantissa_explicit_bits());
|
||||
// Check if we have infinity after computation
|
||||
answer.mantissa &= ~(am_mant_t(1) << binary::mantissa_explicit_bits());
|
||||
if (answer.power2 >= binary::infinite_power()) { // infinity
|
||||
answer.power2 = binary::infinite_power();
|
||||
answer.mantissa = 0;
|
||||
}
|
||||
|
||||
return answer;
|
||||
}
|
||||
|
||||
|
||||
@ -38,8 +38,8 @@ constexpr static uint64_t powers_of_ten_uint64[] = {1UL,
|
||||
// this algorithm is not even close to optimized, but it has no practical
|
||||
// effect on performance: in order to have a faster algorithm, we'd need
|
||||
// to slow down performance for faster algorithms, and this is still fast.
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR14 int32_t
|
||||
scientific_exponent(uint64_t mantissa, int32_t exponent) noexcept {
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR14 am_pow_t
|
||||
scientific_exponent(am_mant_t mantissa, am_pow_t exponent) noexcept {
|
||||
while (mantissa >= 10000) {
|
||||
mantissa /= 10000;
|
||||
exponent += 4;
|
||||
@ -58,29 +58,26 @@ scientific_exponent(uint64_t mantissa, int32_t exponent) noexcept {
|
||||
// this converts a native floating-point number to an extended-precision float.
|
||||
template <typename T>
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
|
||||
to_extended(T value) noexcept {
|
||||
to_extended(T const value) noexcept {
|
||||
using equiv_uint = equiv_uint_t<T>;
|
||||
constexpr equiv_uint exponent_mask = binary_format<T>::exponent_mask();
|
||||
constexpr equiv_uint mantissa_mask = binary_format<T>::mantissa_mask();
|
||||
constexpr equiv_uint hidden_bit_mask = binary_format<T>::hidden_bit_mask();
|
||||
|
||||
adjusted_mantissa am;
|
||||
int32_t bias = binary_format<T>::mantissa_explicit_bits() -
|
||||
binary_format<T>::minimum_exponent();
|
||||
equiv_uint bits;
|
||||
#if FASTFLOAT_HAS_BIT_CAST
|
||||
bits = std::bit_cast<equiv_uint>(value);
|
||||
#else
|
||||
::memcpy(&bits, &value, sizeof(T));
|
||||
#endif
|
||||
constexpr am_pow_t bias = binary_format<T>::mantissa_explicit_bits() -
|
||||
binary_format<T>::minimum_exponent();
|
||||
|
||||
auto const bits = bit_cast<equiv_uint>(value);
|
||||
|
||||
if ((bits & exponent_mask) == 0) {
|
||||
// denormal
|
||||
am.power2 = 1 - bias;
|
||||
am.mantissa = bits & mantissa_mask;
|
||||
} else {
|
||||
// normal
|
||||
am.power2 = int32_t((bits & exponent_mask) >>
|
||||
binary_format<T>::mantissa_explicit_bits());
|
||||
am.power2 = static_cast<am_pow_t>(
|
||||
(bits & exponent_mask) >> binary_format<T>::mantissa_explicit_bits());
|
||||
am.power2 -= bias;
|
||||
am.mantissa = (bits & mantissa_mask) | hidden_bit_mask;
|
||||
}
|
||||
@ -93,7 +90,7 @@ to_extended(T value) noexcept {
|
||||
// halfway between b and b+u.
|
||||
template <typename T>
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
|
||||
to_extended_halfway(T value) noexcept {
|
||||
to_extended_halfway(T const value) noexcept {
|
||||
adjusted_mantissa am = to_extended(value);
|
||||
am.mantissa <<= 1;
|
||||
am.mantissa += 1;
|
||||
@ -105,14 +102,15 @@ to_extended_halfway(T value) noexcept {
|
||||
template <typename T, typename callback>
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void round(adjusted_mantissa &am,
|
||||
callback cb) noexcept {
|
||||
int32_t mantissa_shift = 64 - binary_format<T>::mantissa_explicit_bits() - 1;
|
||||
constexpr am_pow_t mantissa_shift =
|
||||
64 - binary_format<T>::mantissa_explicit_bits() - 1;
|
||||
if (-am.power2 >= mantissa_shift) {
|
||||
// have a denormal float
|
||||
int32_t shift = -am.power2 + 1;
|
||||
cb(am, std::min<int32_t>(shift, 64));
|
||||
am_pow_t shift = -am.power2 + 1;
|
||||
cb(am, std::min<am_pow_t>(shift, 64));
|
||||
// check for round-up: if rounding-nearest carried us to the hidden bit.
|
||||
am.power2 = (am.mantissa <
|
||||
(uint64_t(1) << binary_format<T>::mantissa_explicit_bits()))
|
||||
(am_mant_t(1) << binary_format<T>::mantissa_explicit_bits()))
|
||||
? 0
|
||||
: 1;
|
||||
return;
|
||||
@ -123,13 +121,13 @@ fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void round(adjusted_mantissa &am,
|
||||
|
||||
// check for carry
|
||||
if (am.mantissa >=
|
||||
(uint64_t(2) << binary_format<T>::mantissa_explicit_bits())) {
|
||||
am.mantissa = (uint64_t(1) << binary_format<T>::mantissa_explicit_bits());
|
||||
am.power2++;
|
||||
(am_mant_t(2) << binary_format<T>::mantissa_explicit_bits())) {
|
||||
am.mantissa = (am_mant_t(1) << binary_format<T>::mantissa_explicit_bits());
|
||||
++am.power2;
|
||||
}
|
||||
|
||||
// check for infinite: we could have carried to an infinite power
|
||||
am.mantissa &= ~(uint64_t(1) << binary_format<T>::mantissa_explicit_bits());
|
||||
am.mantissa &= ~(am_mant_t(1) << binary_format<T>::mantissa_explicit_bits());
|
||||
if (am.power2 >= binary_format<T>::infinite_power()) {
|
||||
am.power2 = binary_format<T>::infinite_power();
|
||||
am.mantissa = 0;
|
||||
@ -138,11 +136,12 @@ fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void round(adjusted_mantissa &am,
|
||||
|
||||
template <typename callback>
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void
|
||||
round_nearest_tie_even(adjusted_mantissa &am, int32_t shift,
|
||||
round_nearest_tie_even(adjusted_mantissa &am, am_pow_t shift,
|
||||
callback cb) noexcept {
|
||||
uint64_t const mask = (shift == 64) ? UINT64_MAX : (uint64_t(1) << shift) - 1;
|
||||
uint64_t const halfway = (shift == 0) ? 0 : uint64_t(1) << (shift - 1);
|
||||
uint64_t truncated_bits = am.mantissa & mask;
|
||||
am_mant_t const mask = (shift == 64) ? std::numeric_limits<am_mant_t>::max()
|
||||
: (am_mant_t(1) << shift) - 1;
|
||||
am_mant_t const halfway = (shift == 0) ? 0 : am_mant_t(1) << (shift - 1);
|
||||
am_mant_t truncated_bits = am.mantissa & mask;
|
||||
bool is_above = truncated_bits > halfway;
|
||||
bool is_halfway = truncated_bits == halfway;
|
||||
|
||||
@ -155,11 +154,11 @@ round_nearest_tie_even(adjusted_mantissa &am, int32_t shift,
|
||||
am.power2 += shift;
|
||||
|
||||
bool is_odd = (am.mantissa & 1) == 1;
|
||||
am.mantissa += uint64_t(cb(is_odd, is_halfway, is_above));
|
||||
am.mantissa += am_mant_t(cb(is_odd, is_halfway, is_above));
|
||||
}
|
||||
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void
|
||||
round_down(adjusted_mantissa &am, int32_t shift) noexcept {
|
||||
round_down(adjusted_mantissa &am, am_pow_t shift) noexcept {
|
||||
if (shift == 64) {
|
||||
am.mantissa = 0;
|
||||
} else {
|
||||
@ -171,9 +170,9 @@ round_down(adjusted_mantissa &am, int32_t shift) noexcept {
|
||||
template <typename UC>
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void
|
||||
skip_zeros(UC const *&first, UC const *last) noexcept {
|
||||
uint64_t val;
|
||||
while (!cpp20_and_in_constexpr() &&
|
||||
std::distance(first, last) >= int_cmp_len<UC>()) {
|
||||
uint64_t val;
|
||||
::memcpy(&val, first, sizeof(uint64_t));
|
||||
if (val != int_cmp_zeros<UC>()) {
|
||||
break;
|
||||
@ -184,7 +183,7 @@ skip_zeros(UC const *&first, UC const *last) noexcept {
|
||||
if (*first != UC('0')) {
|
||||
break;
|
||||
}
|
||||
first++;
|
||||
++first;
|
||||
}
|
||||
}
|
||||
|
||||
@ -194,9 +193,9 @@ template <typename UC>
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool
|
||||
is_truncated(UC const *first, UC const *last) noexcept {
|
||||
// do 8-bit optimizations, can just compare to 8 literal 0s.
|
||||
uint64_t val;
|
||||
while (!cpp20_and_in_constexpr() &&
|
||||
std::distance(first, last) >= int_cmp_len<UC>()) {
|
||||
uint64_t val;
|
||||
::memcpy(&val, first, sizeof(uint64_t));
|
||||
if (val != int_cmp_zeros<UC>()) {
|
||||
return true;
|
||||
@ -220,8 +219,8 @@ is_truncated(span<UC const> s) noexcept {
|
||||
|
||||
template <typename UC>
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void
|
||||
parse_eight_digits(UC const *&p, limb &value, size_t &counter,
|
||||
size_t &count) noexcept {
|
||||
parse_eight_digits(UC const *&p, limb &value, am_digits &counter,
|
||||
am_digits &count) noexcept {
|
||||
value = value * 100000000 + parse_eight_digits_unrolled(p);
|
||||
p += 8;
|
||||
counter += 8;
|
||||
@ -230,12 +229,12 @@ parse_eight_digits(UC const *&p, limb &value, size_t &counter,
|
||||
|
||||
template <typename UC>
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void
|
||||
parse_one_digit(UC const *&p, limb &value, size_t &counter,
|
||||
size_t &count) noexcept {
|
||||
parse_one_digit(UC const *&p, limb &value, am_digits &counter,
|
||||
am_digits &count) noexcept {
|
||||
value = value * 10 + limb(*p - UC('0'));
|
||||
p++;
|
||||
counter++;
|
||||
count++;
|
||||
++p;
|
||||
++counter;
|
||||
++count;
|
||||
}
|
||||
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void
|
||||
@ -245,28 +244,28 @@ add_native(bigint &big, limb power, limb value) noexcept {
|
||||
}
|
||||
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void
|
||||
round_up_bigint(bigint &big, size_t &count) noexcept {
|
||||
round_up_bigint(bigint &big, am_digits &count) noexcept {
|
||||
// need to round-up the digits, but need to avoid rounding
|
||||
// ....9999 to ...10000, which could cause a false halfway point.
|
||||
add_native(big, 10, 1);
|
||||
count++;
|
||||
++count;
|
||||
}
|
||||
|
||||
// parse the significant digits into a big integer
|
||||
template <typename UC>
|
||||
inline FASTFLOAT_CONSTEXPR20 void
|
||||
parse_mantissa(bigint &result, parsed_number_string_t<UC> &num,
|
||||
size_t max_digits, size_t &digits) noexcept {
|
||||
template <typename T, typename UC>
|
||||
inline FASTFLOAT_CONSTEXPR20 am_digits
|
||||
parse_mantissa(bigint &result, const parsed_number_string_t<UC> &num) noexcept {
|
||||
// try to minimize the number of big integer and scalar multiplication.
|
||||
// therefore, try to parse 8 digits at a time, and multiply by the largest
|
||||
// scalar value (9 or 19 digits) for each step.
|
||||
size_t counter = 0;
|
||||
digits = 0;
|
||||
constexpr am_digits max_digits = binary_format<T>::max_digits();
|
||||
am_digits counter = 0;
|
||||
am_digits digits = 0;
|
||||
limb value = 0;
|
||||
#ifdef FASTFLOAT_64BIT_LIMB
|
||||
size_t step = 19;
|
||||
constexpr am_digits step = 19;
|
||||
#else
|
||||
size_t step = 9;
|
||||
constexpr am_digits step = 9;
|
||||
#endif
|
||||
|
||||
// process all integer digits.
|
||||
@ -292,7 +291,7 @@ parse_mantissa(bigint &result, parsed_number_string_t<UC> &num,
|
||||
if (truncated) {
|
||||
round_up_bigint(result, digits);
|
||||
}
|
||||
return;
|
||||
return digits;
|
||||
} else {
|
||||
add_native(result, limb(powers_of_ten_uint64[counter]), value);
|
||||
counter = 0;
|
||||
@ -323,7 +322,7 @@ parse_mantissa(bigint &result, parsed_number_string_t<UC> &num,
|
||||
if (truncated) {
|
||||
round_up_bigint(result, digits);
|
||||
}
|
||||
return;
|
||||
return digits;
|
||||
} else {
|
||||
add_native(result, limb(powers_of_ten_uint64[counter]), value);
|
||||
counter = 0;
|
||||
@ -335,20 +334,21 @@ parse_mantissa(bigint &result, parsed_number_string_t<UC> &num,
|
||||
if (counter != 0) {
|
||||
add_native(result, limb(powers_of_ten_uint64[counter]), value);
|
||||
}
|
||||
return digits;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
|
||||
positive_digit_comp(bigint &bigmant, int32_t exponent) noexcept {
|
||||
FASTFLOAT_ASSERT(bigmant.pow10(uint32_t(exponent)));
|
||||
adjusted_mantissa answer;
|
||||
inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa positive_digit_comp(
|
||||
bigint &bigmant, adjusted_mantissa am, am_pow_t const exponent) noexcept {
|
||||
FASTFLOAT_ASSERT(bigmant.pow10(exponent));
|
||||
bool truncated;
|
||||
answer.mantissa = bigmant.hi64(truncated);
|
||||
int bias = binary_format<T>::mantissa_explicit_bits() -
|
||||
binary_format<T>::minimum_exponent();
|
||||
answer.power2 = bigmant.bit_length() - 64 + bias;
|
||||
am.mantissa = bigmant.hi64(truncated);
|
||||
constexpr am_pow_t bias = binary_format<T>::mantissa_explicit_bits() -
|
||||
binary_format<T>::minimum_exponent();
|
||||
am.power2 =
|
||||
static_cast<fast_float::am_pow_t>(bigmant.bit_length() - 64 + bias);
|
||||
|
||||
round<T>(answer, [truncated](adjusted_mantissa &a, int32_t shift) {
|
||||
round<T>(am, [truncated](adjusted_mantissa &a, am_pow_t shift) {
|
||||
round_nearest_tie_even(
|
||||
a, shift,
|
||||
[truncated](bool is_odd, bool is_halfway, bool is_above) -> bool {
|
||||
@ -357,7 +357,7 @@ positive_digit_comp(bigint &bigmant, int32_t exponent) noexcept {
|
||||
});
|
||||
});
|
||||
|
||||
return answer;
|
||||
return am;
|
||||
}
|
||||
|
||||
// the scaling here is quite simple: we have, for the real digits `m * 10^e`,
|
||||
@ -366,39 +366,40 @@ positive_digit_comp(bigint &bigmant, int32_t exponent) noexcept {
|
||||
// we then need to scale by `2^(f- e)`, and then the two significant digits
|
||||
// are of the same magnitude.
|
||||
template <typename T>
|
||||
inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa negative_digit_comp(
|
||||
bigint &bigmant, adjusted_mantissa am, int32_t exponent) noexcept {
|
||||
bigint &real_digits = bigmant;
|
||||
int32_t real_exp = exponent;
|
||||
|
||||
inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
|
||||
negative_digit_comp(bigint &real_digits, adjusted_mantissa am,
|
||||
am_pow_t const real_exp) noexcept {
|
||||
// get the value of `b`, rounded down, and get a bigint representation of b+h
|
||||
adjusted_mantissa am_b = am;
|
||||
// gcc7 buf: use a lambda to remove the noexcept qualifier bug with
|
||||
// gcc7 bug: use a lambda to remove the noexcept qualifier bug with
|
||||
// -Wnoexcept-type.
|
||||
round<T>(am_b,
|
||||
[](adjusted_mantissa &a, int32_t shift) { round_down(a, shift); });
|
||||
[](adjusted_mantissa &a, am_pow_t shift) { round_down(a, shift); });
|
||||
T b;
|
||||
to_float(false, am_b, b);
|
||||
adjusted_mantissa theor = to_extended_halfway(b);
|
||||
to_float(
|
||||
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||
false,
|
||||
#endif
|
||||
am_b, b);
|
||||
adjusted_mantissa const theor = to_extended_halfway(b);
|
||||
bigint theor_digits(theor.mantissa);
|
||||
int32_t theor_exp = theor.power2;
|
||||
am_pow_t const theor_exp = theor.power2;
|
||||
|
||||
// scale real digits and theor digits to be same power.
|
||||
int32_t pow2_exp = theor_exp - real_exp;
|
||||
uint32_t pow5_exp = uint32_t(-real_exp);
|
||||
auto const pow2_exp = theor_exp - real_exp;
|
||||
auto const pow5_exp = -real_exp;
|
||||
if (pow5_exp != 0) {
|
||||
FASTFLOAT_ASSERT(theor_digits.pow5(pow5_exp));
|
||||
}
|
||||
if (pow2_exp > 0) {
|
||||
FASTFLOAT_ASSERT(theor_digits.pow2(uint32_t(pow2_exp)));
|
||||
FASTFLOAT_ASSERT(theor_digits.pow2(pow2_exp));
|
||||
} else if (pow2_exp < 0) {
|
||||
FASTFLOAT_ASSERT(real_digits.pow2(uint32_t(-pow2_exp)));
|
||||
FASTFLOAT_ASSERT(real_digits.pow2(-pow2_exp));
|
||||
}
|
||||
|
||||
// compare digits, and use it to direct rounding
|
||||
int ord = real_digits.compare(theor_digits);
|
||||
adjusted_mantissa answer = am;
|
||||
round<T>(answer, [ord](adjusted_mantissa &a, int32_t shift) {
|
||||
auto const ord = real_digits.compare(theor_digits);
|
||||
round<T>(am, [ord](adjusted_mantissa &a, am_pow_t shift) {
|
||||
round_nearest_tie_even(
|
||||
a, shift, [ord](bool is_odd, bool _, bool __) -> bool {
|
||||
(void)_; // not needed, since we've done our comparison
|
||||
@ -413,7 +414,7 @@ inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa negative_digit_comp(
|
||||
});
|
||||
});
|
||||
|
||||
return answer;
|
||||
return am;
|
||||
}
|
||||
|
||||
// parse the significant digits as a big integer to unambiguously round
|
||||
@ -430,21 +431,18 @@ inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa negative_digit_comp(
|
||||
// the actual digits. we then compare the big integer representations
|
||||
// of both, and use that to direct rounding.
|
||||
template <typename T, typename UC>
|
||||
inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
|
||||
digit_comp(parsed_number_string_t<UC> &num, adjusted_mantissa am) noexcept {
|
||||
inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa digit_comp(
|
||||
parsed_number_string_t<UC> const &num, adjusted_mantissa am) noexcept {
|
||||
// remove the invalid exponent bias
|
||||
am.power2 -= invalid_am_bias;
|
||||
|
||||
int32_t sci_exp =
|
||||
scientific_exponent(num.mantissa, static_cast<int32_t>(num.exponent));
|
||||
size_t max_digits = binary_format<T>::max_digits();
|
||||
size_t digits = 0;
|
||||
am_pow_t const sci_exp = scientific_exponent(num.mantissa, num.exponent);
|
||||
bigint bigmant;
|
||||
parse_mantissa(bigmant, num, max_digits, digits);
|
||||
am_digits const digits = parse_mantissa<T, UC>(bigmant, num);
|
||||
// can't underflow, since digits is at most max_digits.
|
||||
int32_t exponent = sci_exp + 1 - int32_t(digits);
|
||||
am_pow_t const exponent = sci_exp + 1 - static_cast<am_pow_t>(digits);
|
||||
if (exponent >= 0) {
|
||||
return positive_digit_comp<T>(bigmant, exponent);
|
||||
return positive_digit_comp<T>(bigmant, am, exponent);
|
||||
} else {
|
||||
return negative_digit_comp<T>(bigmant, am, exponent);
|
||||
}
|
||||
|
||||
@ -34,7 +34,7 @@ template <typename T, typename UC = char,
|
||||
typename = FASTFLOAT_ENABLE_IF(is_supported_float_type<T>::value)>
|
||||
FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
|
||||
from_chars(UC const *first, UC const *last, T &value,
|
||||
chars_format fmt = chars_format::general) noexcept;
|
||||
chars_format const fmt = chars_format::general) noexcept;
|
||||
|
||||
/**
|
||||
* Like from_chars, but accepts an `options` argument to govern number parsing.
|
||||
@ -43,7 +43,7 @@ from_chars(UC const *first, UC const *last, T &value,
|
||||
template <typename T, typename UC = char>
|
||||
FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
|
||||
from_chars_advanced(UC const *first, UC const *last, T &value,
|
||||
parse_options_t<UC> options) noexcept;
|
||||
parse_options_t<UC> const options) noexcept;
|
||||
|
||||
/**
|
||||
* This function multiplies an integer number by a power of 10 and returns
|
||||
@ -59,9 +59,11 @@ from_chars_advanced(UC const *first, UC const *last, T &value,
|
||||
* `new` or `malloc`).
|
||||
*/
|
||||
FASTFLOAT_CONSTEXPR20 inline double
|
||||
integer_times_pow10(uint64_t mantissa, int decimal_exponent) noexcept;
|
||||
integer_times_pow10(uint64_t const mantissa,
|
||||
int const decimal_exponent) noexcept;
|
||||
FASTFLOAT_CONSTEXPR20 inline double
|
||||
integer_times_pow10(int64_t mantissa, int decimal_exponent) noexcept;
|
||||
integer_times_pow10(int64_t const mantissa,
|
||||
int const decimal_exponent) noexcept;
|
||||
|
||||
/**
|
||||
* This function is a template overload of `integer_times_pow10()`
|
||||
@ -71,11 +73,13 @@ integer_times_pow10(int64_t mantissa, int decimal_exponent) noexcept;
|
||||
template <typename T>
|
||||
FASTFLOAT_CONSTEXPR20
|
||||
typename std::enable_if<is_supported_float_type<T>::value, T>::type
|
||||
integer_times_pow10(uint64_t mantissa, int decimal_exponent) noexcept;
|
||||
integer_times_pow10(uint64_t const mantissa,
|
||||
int const decimal_exponent) noexcept;
|
||||
template <typename T>
|
||||
FASTFLOAT_CONSTEXPR20
|
||||
typename std::enable_if<is_supported_float_type<T>::value, T>::type
|
||||
integer_times_pow10(int64_t mantissa, int decimal_exponent) noexcept;
|
||||
integer_times_pow10(int64_t const mantissa,
|
||||
int const decimal_exponent) noexcept;
|
||||
|
||||
/**
|
||||
* from_chars for integer types.
|
||||
@ -83,7 +87,8 @@ FASTFLOAT_CONSTEXPR20
|
||||
template <typename T, typename UC = char,
|
||||
typename = FASTFLOAT_ENABLE_IF(is_supported_integer_type<T>::value)>
|
||||
FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
|
||||
from_chars(UC const *first, UC const *last, T &value, int base = 10) noexcept;
|
||||
from_chars(UC const *first, UC const *last, T &value,
|
||||
int const base = 10) noexcept;
|
||||
|
||||
} // namespace fast_float
|
||||
|
||||
|
||||
@ -1,8 +1,6 @@
|
||||
#ifndef FASTFLOAT_FAST_TABLE_H
|
||||
#define FASTFLOAT_FAST_TABLE_H
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace fast_float {
|
||||
|
||||
/**
|
||||
@ -30,15 +28,14 @@ namespace fast_float {
|
||||
* of 5 greater than 308.
|
||||
*/
|
||||
template <class unused = void> struct powers_template {
|
||||
|
||||
constexpr static int smallest_power_of_five =
|
||||
constexpr static am_pow_t smallest_power_of_five =
|
||||
binary_format<double>::smallest_power_of_ten();
|
||||
constexpr static int largest_power_of_five =
|
||||
constexpr static am_pow_t largest_power_of_five =
|
||||
binary_format<double>::largest_power_of_ten();
|
||||
constexpr static int number_of_entries =
|
||||
constexpr static am_pow_t number_of_entries =
|
||||
2 * (largest_power_of_five - smallest_power_of_five + 1);
|
||||
// Powers of five from 5^-342 all the way to 5^308 rounded toward one.
|
||||
constexpr static uint64_t power_of_five_128[number_of_entries] = {
|
||||
constexpr static am_mant_t power_of_five_128[number_of_entries] = {
|
||||
0xeef453d6923bd65a, 0x113faa2906a13b3f,
|
||||
0x9558b4661b6565f8, 0x4ac7ca59a424c507,
|
||||
0xbaaee17fa23ebf76, 0x5d79bcf00d2df649,
|
||||
@ -696,7 +693,7 @@ template <class unused = void> struct powers_template {
|
||||
#if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE
|
||||
|
||||
template <class unused>
|
||||
constexpr uint64_t
|
||||
constexpr am_mant_t
|
||||
powers_template<unused>::power_of_five_128[number_of_entries];
|
||||
|
||||
#endif
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -14,26 +14,30 @@
|
||||
namespace fast_float {
|
||||
|
||||
namespace detail {
|
||||
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||
/**
|
||||
* Special case +inf, -inf, nan, infinity, -infinity.
|
||||
* Special case inf, +inf, -inf, nan, infinity, -infinity.
|
||||
* The case comparisons could be made much faster given that we know that the
|
||||
* strings a null-free and fixed.
|
||||
**/
|
||||
template <typename T, typename UC>
|
||||
from_chars_result_t<UC>
|
||||
FASTFLOAT_CONSTEXPR14 parse_infnan(UC const *first, UC const *last,
|
||||
T &value, chars_format fmt) noexcept {
|
||||
T &value,
|
||||
const chars_format fmt) noexcept {
|
||||
from_chars_result_t<UC> answer{};
|
||||
answer.ptr = first;
|
||||
answer.ec = std::errc(); // be optimistic
|
||||
// assume first < last, so dereference without checks;
|
||||
|
||||
FASTFLOAT_ASSUME(first < last); // so dereference without checks
|
||||
|
||||
bool const minusSign = (*first == UC('-'));
|
||||
// C++17 20.19.3.(7.1) explicitly forbids '+' sign here
|
||||
if ((*first == UC('-')) ||
|
||||
(uint64_t(fmt & chars_format::allow_leading_plus) &&
|
||||
(*first == UC('+')))) {
|
||||
if (minusSign || ((chars_format_t(fmt & chars_format::allow_leading_plus)) &&
|
||||
(*first == UC('+')))) {
|
||||
++first;
|
||||
}
|
||||
|
||||
if (last - first >= 3) {
|
||||
if (fastfloat_strncasecmp3(first, str_const_nan<UC>())) {
|
||||
answer.ptr = (first += 3);
|
||||
@ -42,7 +46,7 @@ from_chars_result_t<UC>
|
||||
// Check for possible nan(n-char-seq-opt), C++17 20.19.3.7,
|
||||
// C11 7.20.1.3.3. At least MSVC produces nan(ind) and nan(snan).
|
||||
if (first != last && *first == UC('(')) {
|
||||
for (UC const *ptr = first + 1; ptr != last; ++ptr) {
|
||||
for (auto const *ptr = first + 1; ptr != last; ++ptr) {
|
||||
if (*ptr == UC(')')) {
|
||||
answer.ptr = ptr + 1; // valid nan(n-char-seq-opt)
|
||||
break;
|
||||
@ -69,7 +73,9 @@ from_chars_result_t<UC>
|
||||
answer.ec = std::errc::invalid_argument;
|
||||
return answer;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef FASTFLOAT_ONLY_ROUNDS_TO_NEAREST_SUPPORTED
|
||||
/**
|
||||
* Returns true if the floating-pointing rounding mode is to 'nearest'.
|
||||
* It is the default on most system. This function is meant to be inexpensive.
|
||||
@ -134,6 +140,7 @@ fastfloat_really_inline bool rounds_to_nearest() noexcept {
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace detail
|
||||
|
||||
@ -141,7 +148,7 @@ template <typename T> struct from_chars_caller {
|
||||
template <typename UC>
|
||||
FASTFLOAT_CONSTEXPR20 static from_chars_result_t<UC>
|
||||
call(UC const *first, UC const *last, T &value,
|
||||
parse_options_t<UC> options) noexcept {
|
||||
parse_options_t<UC> const options) noexcept {
|
||||
return from_chars_advanced(first, last, value, options);
|
||||
}
|
||||
};
|
||||
@ -151,7 +158,7 @@ template <> struct from_chars_caller<std::float32_t> {
|
||||
template <typename UC>
|
||||
FASTFLOAT_CONSTEXPR20 static from_chars_result_t<UC>
|
||||
call(UC const *first, UC const *last, std::float32_t &value,
|
||||
parse_options_t<UC> options) noexcept {
|
||||
parse_options_t<UC> const options) noexcept {
|
||||
// if std::float32_t is defined, and we are in C++23 mode; macro set for
|
||||
// float32; set value to float due to equivalence between float and
|
||||
// float32_t
|
||||
@ -168,7 +175,7 @@ template <> struct from_chars_caller<std::float64_t> {
|
||||
template <typename UC>
|
||||
FASTFLOAT_CONSTEXPR20 static from_chars_result_t<UC>
|
||||
call(UC const *first, UC const *last, std::float64_t &value,
|
||||
parse_options_t<UC> options) noexcept {
|
||||
parse_options_t<UC> const options) noexcept {
|
||||
// if std::float64_t is defined, and we are in C++23 mode; macro set for
|
||||
// float64; set value as double due to equivalence between double and
|
||||
// float64_t
|
||||
@ -183,14 +190,17 @@ template <> struct from_chars_caller<std::float64_t> {
|
||||
template <typename T, typename UC, typename>
|
||||
FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
|
||||
from_chars(UC const *first, UC const *last, T &value,
|
||||
chars_format fmt /*= chars_format::general*/) noexcept {
|
||||
chars_format const fmt /*= chars_format::general*/) noexcept {
|
||||
return from_chars_caller<T>::call(first, last, value,
|
||||
parse_options_t<UC>(fmt));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool
|
||||
clinger_fast_path_impl(uint64_t mantissa, int64_t exponent, bool is_negative,
|
||||
clinger_fast_path_impl(am_mant_t const mantissa, am_pow_t const exponent,
|
||||
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||
bool const is_negative,
|
||||
#endif
|
||||
T &value) noexcept {
|
||||
// The implementation of the Clinger's fast path is convoluted because
|
||||
// we want round-to-nearest in all cases, irrespective of the rounding mode
|
||||
@ -206,7 +216,9 @@ clinger_fast_path_impl(uint64_t mantissa, int64_t exponent, bool is_negative,
|
||||
// We could check it first (before the previous branch), but
|
||||
// there might be performance advantages at having the check
|
||||
// be last.
|
||||
#ifndef FASTFLOAT_ONLY_ROUNDS_TO_NEAREST_SUPPORTED
|
||||
if (!cpp20_and_in_constexpr() && detail::rounds_to_nearest()) {
|
||||
#endif
|
||||
// We have that fegetround() == FE_TONEAREST.
|
||||
// Next is Clinger's fast path.
|
||||
if (mantissa <= binary_format<T>::max_mantissa_fast_path()) {
|
||||
@ -216,11 +228,14 @@ clinger_fast_path_impl(uint64_t mantissa, int64_t exponent, bool is_negative,
|
||||
} else {
|
||||
value = value * binary_format<T>::exact_power_of_ten(exponent);
|
||||
}
|
||||
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||
if (is_negative) {
|
||||
value = -value;
|
||||
}
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
#ifndef FASTFLOAT_ONLY_ROUNDS_TO_NEAREST_SUPPORTED
|
||||
} else {
|
||||
// We do not have that fegetround() == FE_TONEAREST.
|
||||
// Next is a modified Clinger's fast path, inspired by Jakub Jelínek's
|
||||
@ -230,17 +245,24 @@ clinger_fast_path_impl(uint64_t mantissa, int64_t exponent, bool is_negative,
|
||||
#if defined(__clang__) || defined(FASTFLOAT_32BIT)
|
||||
// Clang may map 0 to -0.0 when fegetround() == FE_DOWNWARD
|
||||
if (mantissa == 0) {
|
||||
value = is_negative ? T(-0.) : T(0.);
|
||||
value =
|
||||
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||
is_negative ? T(-0.) :
|
||||
#endif
|
||||
T(0.);
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
value = T(mantissa) * binary_format<T>::exact_power_of_ten(exponent);
|
||||
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||
if (is_negative) {
|
||||
value = -value;
|
||||
}
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@ -252,7 +274,7 @@ clinger_fast_path_impl(uint64_t mantissa, int64_t exponent, bool is_negative,
|
||||
*/
|
||||
template <typename T, typename UC>
|
||||
FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
|
||||
from_chars_advanced(parsed_number_string_t<UC> &pns, T &value) noexcept {
|
||||
from_chars_advanced(parsed_number_string_t<UC> const &pns, T &value) noexcept {
|
||||
static_assert(is_supported_float_type<T>::value,
|
||||
"only some floating-point types are supported");
|
||||
static_assert(is_supported_char_type<UC>::value,
|
||||
@ -263,8 +285,11 @@ from_chars_advanced(parsed_number_string_t<UC> &pns, T &value) noexcept {
|
||||
answer.ec = std::errc(); // be optimistic
|
||||
answer.ptr = pns.lastmatch;
|
||||
|
||||
if (!pns.too_many_digits &&
|
||||
clinger_fast_path_impl(pns.mantissa, pns.exponent, pns.negative, value))
|
||||
if (!pns.too_many_digits && clinger_fast_path_impl(pns.mantissa, pns.exponent,
|
||||
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||
pns.negative,
|
||||
#endif
|
||||
value))
|
||||
return answer;
|
||||
|
||||
adjusted_mantissa am =
|
||||
@ -280,7 +305,11 @@ from_chars_advanced(parsed_number_string_t<UC> &pns, T &value) noexcept {
|
||||
if (am.power2 < 0) {
|
||||
am = digit_comp<T>(pns, am);
|
||||
}
|
||||
to_float(pns.negative, am, value);
|
||||
to_float(
|
||||
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||
pns.negative,
|
||||
#endif
|
||||
am, value);
|
||||
// Test for over/underflow.
|
||||
if ((pns.mantissa != 0 && am.mantissa == 0 && am.power2 == 0) ||
|
||||
am.power2 == binary_format<T>::infinite_power()) {
|
||||
@ -292,38 +321,51 @@ from_chars_advanced(parsed_number_string_t<UC> &pns, T &value) noexcept {
|
||||
template <typename T, typename UC>
|
||||
FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
|
||||
from_chars_float_advanced(UC const *first, UC const *last, T &value,
|
||||
parse_options_t<UC> options) noexcept {
|
||||
|
||||
parse_options_t<UC> const options) noexcept {
|
||||
static_assert(is_supported_float_type<T>::value,
|
||||
"only some floating-point types are supported");
|
||||
static_assert(is_supported_char_type<UC>::value,
|
||||
"only char, wchar_t, char16_t and char32_t are supported");
|
||||
|
||||
chars_format const fmt = detail::adjust_for_feature_macros(options.format);
|
||||
|
||||
from_chars_result_t<UC> answer;
|
||||
if (uint64_t(fmt & chars_format::skip_white_space)) {
|
||||
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||
if (chars_format_t(options.format & chars_format::skip_white_space)) {
|
||||
while ((first != last) && fast_float::is_space(*first)) {
|
||||
first++;
|
||||
++first;
|
||||
}
|
||||
}
|
||||
#else
|
||||
#ifdef FASTFLOAT_ISNOT_CHECKED_BOUNDS
|
||||
// We are in parser code with external loop that checks bounds.
|
||||
FASTFLOAT_ASSUME(first < last);
|
||||
#endif
|
||||
#endif
|
||||
#ifndef FASTFLOAT_ISNOT_CHECKED_BOUNDS
|
||||
if (first == last) {
|
||||
answer.ec = std::errc::invalid_argument;
|
||||
answer.ptr = first;
|
||||
return answer;
|
||||
}
|
||||
parsed_number_string_t<UC> pns =
|
||||
uint64_t(fmt & detail::basic_json_fmt)
|
||||
#endif
|
||||
parsed_number_string_t<UC> const pns =
|
||||
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||
(chars_format_t(options.format & detail::basic_json_fmt))
|
||||
? parse_number_string<true, UC>(first, last, options)
|
||||
: parse_number_string<false, UC>(first, last, options);
|
||||
if (!pns.valid) {
|
||||
if (uint64_t(fmt & chars_format::no_infnan)) {
|
||||
:
|
||||
#endif
|
||||
parse_number_string<false, UC>(first, last, options);
|
||||
if (pns.invalid) {
|
||||
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||
if (chars_format_t(options.format & chars_format::no_infnan)) {
|
||||
#endif
|
||||
answer.ec = std::errc::invalid_argument;
|
||||
answer.ptr = first;
|
||||
return answer;
|
||||
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||
} else {
|
||||
return detail::parse_infnan(first, last, value, fmt);
|
||||
return detail::parse_infnan(first, last, value, options.format);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
// call overload that takes parsed_number_string_t directly.
|
||||
@ -332,55 +374,80 @@ from_chars_float_advanced(UC const *first, UC const *last, T &value,
|
||||
|
||||
template <typename T, typename UC, typename>
|
||||
FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
|
||||
from_chars(UC const *first, UC const *last, T &value, int base) noexcept {
|
||||
|
||||
from_chars(UC const *first, UC const *last, T &value, int const base) noexcept {
|
||||
static_assert(is_supported_integer_type<T>::value,
|
||||
"only integer types are supported");
|
||||
static_assert(is_supported_char_type<UC>::value,
|
||||
"only char, wchar_t, char16_t and char32_t are supported");
|
||||
|
||||
parse_options_t<UC> options;
|
||||
options.base = base;
|
||||
parse_options_t<UC> const options(chars_format::general, UC('.'),
|
||||
static_cast<base_t>(base));
|
||||
return from_chars_advanced(first, last, value, options);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
FASTFLOAT_CONSTEXPR20
|
||||
typename std::enable_if<is_supported_float_type<T>::value, T>::type
|
||||
integer_times_pow10(uint64_t mantissa, int decimal_exponent) noexcept {
|
||||
integer_times_pow10(uint64_t const mantissa,
|
||||
int const decimal_exponent) noexcept {
|
||||
T value;
|
||||
if (clinger_fast_path_impl(mantissa, decimal_exponent, false, value))
|
||||
const auto exponent = static_cast<am_pow_t>(decimal_exponent);
|
||||
if (clinger_fast_path_impl(mantissa, exponent,
|
||||
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||
false,
|
||||
#endif
|
||||
value))
|
||||
return value;
|
||||
|
||||
adjusted_mantissa am =
|
||||
compute_float<binary_format<T>>(decimal_exponent, mantissa);
|
||||
to_float(false, am, value);
|
||||
adjusted_mantissa am = compute_float<binary_format<T>>(exponent, mantissa);
|
||||
to_float(
|
||||
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||
false,
|
||||
#endif
|
||||
am, value);
|
||||
return value;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
FASTFLOAT_CONSTEXPR20
|
||||
typename std::enable_if<is_supported_float_type<T>::value, T>::type
|
||||
integer_times_pow10(int64_t mantissa, int decimal_exponent) noexcept {
|
||||
const bool is_negative = mantissa < 0;
|
||||
const uint64_t m = static_cast<uint64_t>(is_negative ? -mantissa : mantissa);
|
||||
|
||||
integer_times_pow10(int64_t const mantissa,
|
||||
int const decimal_exponent) noexcept {
|
||||
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||
const auto is_negative = mantissa < 0;
|
||||
const auto m = static_cast<am_mant_t>(is_negative ? -mantissa : mantissa);
|
||||
#else
|
||||
FASTFLOAT_ASSUME(mantissa >= 0);
|
||||
const auto m = static_cast<am_mant_t>(mantissa);
|
||||
#endif
|
||||
const auto exponent = static_cast<am_pow_t>(decimal_exponent);
|
||||
T value;
|
||||
if (clinger_fast_path_impl(m, decimal_exponent, is_negative, value))
|
||||
if (clinger_fast_path_impl(m, exponent,
|
||||
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||
is_negative,
|
||||
#endif
|
||||
value))
|
||||
return value;
|
||||
|
||||
adjusted_mantissa am = compute_float<binary_format<T>>(decimal_exponent, m);
|
||||
to_float(is_negative, am, value);
|
||||
adjusted_mantissa const am = compute_float<binary_format<T>>(exponent, m);
|
||||
|
||||
to_float(
|
||||
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||
is_negative,
|
||||
#endif
|
||||
am, value);
|
||||
return value;
|
||||
}
|
||||
|
||||
FASTFLOAT_CONSTEXPR20 inline double
|
||||
integer_times_pow10(uint64_t mantissa, int decimal_exponent) noexcept {
|
||||
integer_times_pow10(uint64_t const mantissa,
|
||||
int const decimal_exponent) noexcept {
|
||||
return integer_times_pow10<double>(mantissa, decimal_exponent);
|
||||
}
|
||||
|
||||
FASTFLOAT_CONSTEXPR20 inline double
|
||||
integer_times_pow10(int64_t mantissa, int decimal_exponent) noexcept {
|
||||
integer_times_pow10(int64_t const mantissa,
|
||||
int const decimal_exponent) noexcept {
|
||||
return integer_times_pow10<double>(mantissa, decimal_exponent);
|
||||
}
|
||||
|
||||
@ -392,7 +459,8 @@ FASTFLOAT_CONSTEXPR20
|
||||
std::is_integral<Int>::value &&
|
||||
!std::is_signed<Int>::value,
|
||||
T>::type
|
||||
integer_times_pow10(Int mantissa, int decimal_exponent) noexcept {
|
||||
integer_times_pow10(Int const mantissa,
|
||||
int const decimal_exponent) noexcept {
|
||||
return integer_times_pow10<T>(static_cast<uint64_t>(mantissa),
|
||||
decimal_exponent);
|
||||
}
|
||||
@ -403,7 +471,8 @@ FASTFLOAT_CONSTEXPR20
|
||||
std::is_integral<Int>::value &&
|
||||
std::is_signed<Int>::value,
|
||||
T>::type
|
||||
integer_times_pow10(Int mantissa, int decimal_exponent) noexcept {
|
||||
integer_times_pow10(Int const mantissa,
|
||||
int const decimal_exponent) noexcept {
|
||||
return integer_times_pow10<T>(static_cast<int64_t>(mantissa),
|
||||
decimal_exponent);
|
||||
}
|
||||
@ -411,37 +480,44 @@ FASTFLOAT_CONSTEXPR20
|
||||
template <typename Int>
|
||||
FASTFLOAT_CONSTEXPR20 typename std::enable_if<
|
||||
std::is_integral<Int>::value && !std::is_signed<Int>::value, double>::type
|
||||
integer_times_pow10(Int mantissa, int decimal_exponent) noexcept {
|
||||
integer_times_pow10(Int const mantissa, int const decimal_exponent) noexcept {
|
||||
return integer_times_pow10(static_cast<uint64_t>(mantissa), decimal_exponent);
|
||||
}
|
||||
|
||||
template <typename Int>
|
||||
FASTFLOAT_CONSTEXPR20 typename std::enable_if<
|
||||
std::is_integral<Int>::value && std::is_signed<Int>::value, double>::type
|
||||
integer_times_pow10(Int mantissa, int decimal_exponent) noexcept {
|
||||
integer_times_pow10(Int const mantissa, int const decimal_exponent) noexcept {
|
||||
return integer_times_pow10(static_cast<int64_t>(mantissa), decimal_exponent);
|
||||
}
|
||||
|
||||
template <typename T, typename UC>
|
||||
FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
|
||||
from_chars_int_advanced(UC const *first, UC const *last, T &value,
|
||||
parse_options_t<UC> options) noexcept {
|
||||
|
||||
parse_options_t<UC> const options) noexcept {
|
||||
static_assert(is_supported_integer_type<T>::value,
|
||||
"only integer types are supported");
|
||||
static_assert(is_supported_char_type<UC>::value,
|
||||
"only char, wchar_t, char16_t and char32_t are supported");
|
||||
|
||||
chars_format const fmt = detail::adjust_for_feature_macros(options.format);
|
||||
int const base = options.base;
|
||||
|
||||
from_chars_result_t<UC> answer;
|
||||
if (uint64_t(fmt & chars_format::skip_white_space)) {
|
||||
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
|
||||
if (chars_format_t(options.format & chars_format::skip_white_space)) {
|
||||
while ((first != last) && fast_float::is_space(*first)) {
|
||||
first++;
|
||||
++first;
|
||||
}
|
||||
}
|
||||
if (first == last || base < 2 || base > 36) {
|
||||
#else
|
||||
#ifdef FASTFLOAT_ISNOT_CHECKED_BOUNDS
|
||||
// We are in parser code with external loop that checks bounds.
|
||||
FASTFLOAT_ASSUME(first < last);
|
||||
#endif
|
||||
#endif
|
||||
if (
|
||||
#ifndef FASTFLOAT_ISNOT_CHECKED_BOUNDS
|
||||
first == last ||
|
||||
#endif
|
||||
options.base < 2 || options.base > 36) {
|
||||
from_chars_result_t<UC> answer;
|
||||
answer.ec = std::errc::invalid_argument;
|
||||
answer.ptr = first;
|
||||
return answer;
|
||||
@ -458,7 +534,7 @@ template <> struct from_chars_advanced_caller<1> {
|
||||
template <typename T, typename UC>
|
||||
FASTFLOAT_CONSTEXPR20 static from_chars_result_t<UC>
|
||||
call(UC const *first, UC const *last, T &value,
|
||||
parse_options_t<UC> options) noexcept {
|
||||
parse_options_t<UC> const options) noexcept {
|
||||
return from_chars_float_advanced(first, last, value, options);
|
||||
}
|
||||
};
|
||||
@ -467,7 +543,7 @@ template <> struct from_chars_advanced_caller<2> {
|
||||
template <typename T, typename UC>
|
||||
FASTFLOAT_CONSTEXPR20 static from_chars_result_t<UC>
|
||||
call(UC const *first, UC const *last, T &value,
|
||||
parse_options_t<UC> options) noexcept {
|
||||
parse_options_t<UC> const options) noexcept {
|
||||
return from_chars_int_advanced(first, last, value, options);
|
||||
}
|
||||
};
|
||||
@ -475,7 +551,7 @@ template <> struct from_chars_advanced_caller<2> {
|
||||
template <typename T, typename UC>
|
||||
FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
|
||||
from_chars_advanced(UC const *first, UC const *last, T &value,
|
||||
parse_options_t<UC> options) noexcept {
|
||||
parse_options_t<UC> const options) noexcept {
|
||||
return from_chars_advanced_caller<
|
||||
size_t(is_supported_float_type<T>::value) +
|
||||
2 * size_t(is_supported_integer_type<T>::value)>::call(first, last, value,
|
||||
|
||||
@ -69,7 +69,7 @@ template <typename T> std::string fHexAndDec(T v) {
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
char const *round_name(int d) {
|
||||
constexpr std::string_view const round_name(int d) {
|
||||
switch (d) {
|
||||
case FE_UPWARD:
|
||||
return "FE_UPWARD";
|
||||
@ -2328,7 +2328,7 @@ TEST_CASE("integer_times_pow10") {
|
||||
|
||||
for (int mode : {FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO, FE_TONEAREST}) {
|
||||
fesetround(mode);
|
||||
INFO("fesetround(): " << std::string{round_name(mode)});
|
||||
INFO("fesetround(): " << round_name(mode));
|
||||
|
||||
struct Guard {
|
||||
~Guard() { fesetround(FE_TONEAREST); }
|
||||
|
||||
@ -136,7 +136,7 @@ int main() {
|
||||
fast_float::parse_options(
|
||||
fast_float::chars_format::json |
|
||||
fast_float::chars_format::allow_leading_plus)); // should be ignored
|
||||
if (answer.valid) {
|
||||
if (!answer.invalid) {
|
||||
std::cerr << "json parse accepted invalid json " << f << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
@ -167,4 +167,4 @@ int main() {
|
||||
#endif
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
@ -51,7 +51,6 @@ template <typename T> bool test() {
|
||||
}
|
||||
|
||||
int main() {
|
||||
|
||||
std::cout << "32 bits checks" << std::endl;
|
||||
Assert(test<float>());
|
||||
|
||||
|
||||
@ -198,7 +198,7 @@ bool tester(uint64_t seed, size_t volume) {
|
||||
char buffer[4096]; // large buffer (can't overflow)
|
||||
RandomEngine rand(seed);
|
||||
for (size_t i = 0; i < volume; i++) {
|
||||
if ((i % 100000) == 0) {
|
||||
if ((i % 1000000) == 0) {
|
||||
std::cout << ".";
|
||||
std::cout.flush();
|
||||
}
|
||||
@ -256,10 +256,12 @@ bool tester(uint64_t seed, size_t volume) {
|
||||
}
|
||||
|
||||
int main() {
|
||||
|
||||
#if defined(__CYGWIN__) || defined(__MINGW32__) || defined(__MINGW64__) || \
|
||||
defined(sun) || defined(__sun)
|
||||
std::cout << "Warning: msys/cygwin or solaris detected." << std::endl;
|
||||
std::cout << "Warning: msys/cygwin or solaris detected. This particular test "
|
||||
"is likely to generate false failures due to our reliance on "
|
||||
"the underlying runtime library."
|
||||
<< std::endl;
|
||||
return EXIT_SUCCESS;
|
||||
#else
|
||||
if (tester(1234344, 100000000)) {
|
||||
|
||||
@ -8,9 +8,9 @@
|
||||
#include <vector>
|
||||
|
||||
struct test_data {
|
||||
std::string input;
|
||||
bool expected_success;
|
||||
double expected_result;
|
||||
const std::string input;
|
||||
const bool expected_success;
|
||||
const double expected_result;
|
||||
};
|
||||
|
||||
bool eddelbuettel() {
|
||||
@ -51,10 +51,10 @@ bool eddelbuettel() {
|
||||
{"-+inf", false, 0.0},
|
||||
{"-+nan", false, 0.0},
|
||||
};
|
||||
for (size_t i = 0; i < test_datas.size(); i++) {
|
||||
auto const &input = test_datas[i].input;
|
||||
auto const expected_success = test_datas[i].expected_success;
|
||||
auto const expected_result = test_datas[i].expected_result;
|
||||
for (const auto &i : test_datas) {
|
||||
auto const &input = i.input;
|
||||
auto const expected_success = i.expected_success;
|
||||
auto const expected_result = i.expected_result;
|
||||
double result;
|
||||
// answer contains a error code and a pointer to the end of the
|
||||
// parsed region (on success).
|
||||
|
||||
@ -253,9 +253,9 @@ bool tester(uint64_t seed, size_t volume) {
|
||||
int main() {
|
||||
#if defined(__CYGWIN__) || defined(__MINGW32__) || defined(__MINGW64__) || \
|
||||
defined(sun) || defined(__sun)
|
||||
std::cout << "Warning: msys/cygwin detected. This particular test is likely "
|
||||
"to generate false failures due to our reliance on the "
|
||||
"underlying runtime library."
|
||||
std::cout << "Warning: msys/cygwin or solaris detected. This particular test "
|
||||
"is likely to generate false failures due to our reliance on "
|
||||
"the underlying runtime library."
|
||||
<< std::endl;
|
||||
return EXIT_SUCCESS;
|
||||
#else
|
||||
@ -263,6 +263,7 @@ int main() {
|
||||
std::cout << "All tests ok." << std::endl;
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
std::cout << "Failure." << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
|
||||
#endif
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user