Merge 6bc8902d94959d720d9cf1b2cd99813f8595a76d into 221a4920db7d68d33ab9794af602daef19667351

2026-02-09 11:16:45 +08:00 · 2026-02-05 01:16:34 +08:00 · 2026-02-05 01:16:34 +08:00 · 27cbb3ada4
commit 27cbb3ada4
parent 221a4920db 6bc8902d94
21 changed files with 1469 additions and 836 deletions
--- a/1
+++ b/1
@ -9,3 +9,4 @@ Jan Pharago
 Maya Warrier
 Taha Khokhar
 Anders Dalvander
+Elle Solomina
--- a/README.md
+++ b/README.md
@ -1,7 +1,14 @@

 ## fast_float number parsing library: 4x faster than strtod
-
-[![Ubuntu 22.04 CI (GCC 11)](https://github.com/fastfloat/fast_float/actions/workflows/ubuntu22.yml/badge.svg)](https://github.com/fastfloat/fast_float/actions/workflows/ubuntu22.yml)
+[![Ubuntu 22.04](https://github.com/irainman/fast_float/actions/workflows/ubuntu22.yml/badge.svg)](https://github.com/irainman/fast_float/actions/workflows/ubuntu22.yml)
+[![Ubuntu 22.04 clang](https://github.com/irainman/fast_float/actions/workflows/ubuntu22-clang.yml/badge.svg)](https://github.com/irainman/fast_float/actions/workflows/ubuntu22-clang.yml)
+[![Ubuntu 24.04](https://github.com/irainman/fast_float/actions/workflows/ubuntu24.yml/badge.svg)](https://github.com/irainman/fast_float/actions/workflows/ubuntu24.yml)
+[![Ubuntu 24.04 C++20](https://github.com/irainman/fast_float/actions/workflows/ubuntu24-cxx20.yml/badge.svg)](https://github.com/irainman/fast_float/actions/workflows/ubuntu24-cxx20.yml)
+[![Alpine](https://github.com/irainman/fast_float/actions/workflows/alpine.yml/badge.svg)](https://github.com/irainman/fast_float/actions/workflows/alpine.yml)
+[![vs17](https://github.com/irainman/fast_float/actions/workflows/vs17-ci.yml/badge.svg)](https://github.com/irainman/fast_float/actions/workflows/vs17-ci.yml)
+[![vs17 C++20](https://github.com/irainman/fast_float/actions/workflows/vs17-cxx20.yml/badge.svg)](https://github.com/irainman/fast_float/actions/workflows/vs17-cxx20.yml)
+[![vs17 clang](https://github.com/irainman/fast_float/actions/workflows/vs17-clang-ci.yml/badge.svg)](https://github.com/irainman/fast_float/actions/workflows/vs17-clang-ci.yml)
+[![CodeFactor](https://www.codefactor.io/repository/github/irainman/fast_float/badge)](https://www.codefactor.io/repository/github/irainman/fast_float)

 The fast_float library provides fast header-only implementations for the C++
 from_chars functions for `float` and `double` types as well as integer types.
@ -35,7 +42,7 @@ struct from_chars_result {
 };
 ```

-It parses the character sequence `[first, last)` for a number. It parses
+It parses the character sequence `[first, last]` for a number. It parses
 floating-point numbers expecting a locale-independent format equivalent to the
 C++17 from_chars function. The resulting floating-point value is the closest
 floating-point values (using either `float` or `double`), using the "round to
@ -48,7 +55,8 @@ parsed value. In case of error, the returned `ec` contains a representative
 error, otherwise the default (`std::errc()`) value is stored.

 The implementation does not throw and does not allocate memory (e.g., with `new`
-or `malloc`).
+or `malloc`) and can be usable in the kernel, embeded and other scenarious that
+relays on such behavior.

 It will parse infinity and nan values.

@ -291,7 +299,7 @@ int main() {
 }
 ```

-## Advanced options: using commas as decimal separator, JSON and Fortran
+## Advanced options: using commas as decimal separator, parse JSON, Fortran and more

 The C++ standard stipulate that `from_chars` has to be locale-independent. In
 particular, the decimal separator has to be the period (`.`). However, some
@ -380,6 +388,42 @@ int main() {
 }
 ```

+## You also can use some additional options to maximize performance and reduce size (made by HedgehogInTheCPP):
+
+There is a really common use case in mathematical and other abstract syntax tree (AST)-like parsers that already processes
+the sign and all other symbols before any number by itself. In this case you can use FastFloat to only parse positive numbers
+in all supported formats with macros `FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN`, which significantly reduce the code size
+and improve performance. You also can use macros `FASTFLOAT_ISNOT_CHECKED_BOUNDS` if your code already checks bounds;
+it's very likely because all parsers need to check the first character by itself before parsing. Additionally, you can use
+macros `FASTFLOAT_ONLY_ROUNDS_TO_NEAREST_SUPPORTED` if you only need `FE_TONEAREST` rounding mode in the parsing; this option
+also improves performance a bit and reduces code size. In the high-performance example, I also use the [fmt library](https://github.com/fmtlib/fmt), which also
+supports all C++ standards since C++11. I also recommend using `string_view` everywhere if it's possible; it's available
+since C++17, and if you want maximum performance, use the latest compiler with the latest C++ with maximum optimization:
+```
+-O3 -DNDEBUG + LTO
+```
+```C++
+#define FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
+#define FASTFLOAT_ISNOT_CHECKED_BOUNDS
+#define FASTFLOAT_ONLY_ROUNDS_TO_NEAREST_SUPPORTED
+#include "fast_float/fast_float.h"
+#include "fmt/base.h"
+#include <string_view>
+
+int main() {
+  std::string_view input = "23.14069263277926900572";
+  double result;
+  auto answer = fast_float::from_chars(input.data(), input.data() + input.size(), result);
+  if ((answer.ec != std::errc()) || ((result != 23.14069263277927 /*properly rounded value */)))
+  {
+    fmt::print(stderr, "parsing failure!\n    the number {}.", result);
+    return 1;
+  }
+  fmt::print("parsed the number {}.", result);
+  return 0;
+}
+```
+
 ## Multiplication of an integer by a power of 10
 An integer `W` can be multiplied by a power of ten `10^Q` and
 converted to `double` with correctly rounded value
@ -424,7 +468,6 @@ float: 12345678 * 10^23 = 1.23456782e+30 (==expected)
 Overloads of `fast_float::integer_times_pow10()` are provided for
 signed and unsigned integer types: `int64_t`, `uint64_t`, etc.

-
 ## Users and Related Work

 The fast_float library is part of:
--- a/benchmarks/bench_ip.cpp
+++ b/benchmarks/bench_ip.cpp
@ -1,3 +1,8 @@
+
+// #define FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
+// #define FASTFLOAT_ONLY_ROUNDS_TO_NEAREST_SUPPORTED
+// #define FASTFLOAT_ISNOT_CHECKED_BOUNDS
+
 #include "counters/bench.h"
 #include "fast_float/fast_float.h"
 #include <charconv>
--- a/benchmarks/benchmark.cpp
+++ b/benchmarks/benchmark.cpp
@ -1,10 +1,13 @@
+
+// #define FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
+// #define FASTFLOAT_ONLY_ROUNDS_TO_NEAREST_SUPPORTED
+// #define FASTFLOAT_ISNOT_CHECKED_BOUNDS
+
 #if defined(__linux__) || (__APPLE__ && __aarch64__)
 #define USING_COUNTERS
 #endif
 #include "counters/event_counter.h"
 #include <algorithm>
-#include "fast_float/fast_float.h"
-#include <chrono>
 #include <climits>
 #include <cmath>
 #include <cstdint>
@ -19,15 +22,17 @@
 #include <sstream>
 #include <stdio.h>
 #include <string>
-#include <vector>
 #include <locale.h>

-template <typename CharT>
-double findmax_fastfloat64(std::vector<std::basic_string<CharT>> &s) {
-  double answer = 0;
-  double x = 0;
+#include "fast_float/fast_float.h"
+
+template <typename CharT, typename Value>
+Value findmax_fastfloat(std::vector<std::basic_string<CharT>> &s) {
+  Value answer = 0;
+  Value x = 0;
  for (auto &st : s) {
    auto [p, ec] = fast_float::from_chars(st.data(), st.data() + st.size(), x);
+
    if (p == st.data()) {
      throw std::runtime_error("bug in findmax_fastfloat");
    }
@ -36,42 +41,30 @@ double findmax_fastfloat64(std::vector<std::basic_string<CharT>> &s) {
  return answer;
 }

-template <typename CharT>
-double findmax_fastfloat32(std::vector<std::basic_string<CharT>> &s) {
-  float answer = 0;
-  float x = 0;
-  for (auto &st : s) {
-    auto [p, ec] = fast_float::from_chars(st.data(), st.data() + st.size(), x);
-    if (p == st.data()) {
-      throw std::runtime_error("bug in findmax_fastfloat");
-    }
-    answer = answer > x ? answer : x;
-  }
-  return answer;
-}
+#ifdef USING_COUNTERS

 counters::event_collector collector{};

-#ifdef USING_COUNTERS
 template <class T, class CharT>
 std::vector<counters::event_count>
 time_it_ns(std::vector<std::basic_string<CharT>> &lines, T const &function,
-           size_t repeat) {
+           uint32_t repeat) {
  std::vector<counters::event_count> aggregate;
  bool printed_bug = false;
-  for (size_t i = 0; i < repeat; i++) {
+  for (uint32_t i = 0; i != repeat; ++i) {
    collector.start();
-    double ts = function(lines);
+    auto const ts = function(lines);
+    aggregate.push_back(collector.end());
+
    if (ts == 0 && !printed_bug) {
      printf("bug\n");
      printed_bug = true;
    }
-    aggregate.push_back(collector.end());
  }
  return aggregate;
 }

-void pretty_print(double volume, size_t number_of_floats, std::string name,
+void pretty_print(uint64_t volume, size_t number_of_floats, std::string name,
                  std::vector<counters::event_count> events) {
  double volumeMB = volume / (1024. * 1024.);
  double average_ns{0};
@ -139,25 +132,27 @@ time_it_ns(std::vector<std::basic_string<CharT>> &lines, T const &function,
  double average = 0;
  double min_value = DBL_MAX;
  bool printed_bug = false;
-  for (size_t i = 0; i < repeat; i++) {
+  for (size_t i = 0; i != repeat; ++i) {
    t1 = std::chrono::high_resolution_clock::now();
-    double ts = function(lines);
+    auto const ts = function(lines);
+    t2 = std::chrono::high_resolution_clock::now();
+
+    double const dif = static_cast<double>(
+        std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1).count());
+    average += dif;
+    min_value = min_value < dif ? min_value : dif;
+
    if (ts == 0 && !printed_bug) {
      printf("bug\n");
      printed_bug = true;
    }
-    t2 = std::chrono::high_resolution_clock::now();
-    double dif =
-        std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1).count();
-    average += dif;
-    min_value = min_value < dif ? min_value : dif;
  }
  average /= repeat;
  return std::make_pair(min_value, average);
 }

-void pretty_print(double volume, size_t number_of_floats, std::string name,
-                  std::pair<double, double> result) {
+void pretty_print(uint64_t volume, size_t number_of_floats,
+                  std::string const &name, std::pair<double, double> result) {
  double volumeMB = volume / (1024. * 1024.);
  printf("%-40s: %8.2f MB/s (+/- %.1f %%) ", name.data(),
         volumeMB * 1000000000 / result.first,
@ -168,10 +163,10 @@ void pretty_print(double volume, size_t number_of_floats, std::string name,
 #endif

 // this is okay, all chars are ASCII
-inline std::u16string widen(std::string line) {
+inline std::u16string widen(std::string const &line) {
  std::u16string u16line;
  u16line.resize(line.size());
-  for (size_t i = 0; i < line.size(); ++i) {
+  for (uint32_t i = 0; i != line.size(); ++i) {
    u16line[i] = char16_t(line[i]);
  }
  return u16line;
@ -181,28 +176,29 @@ std::vector<std::u16string> widen(const std::vector<std::string> &lines) {
  std::vector<std::u16string> u16lines;
  u16lines.reserve(lines.size());
  for (auto const &line : lines) {
-    u16lines.push_back(widen(line));
+    u16lines.emplace_back(widen(line));
  }
  return u16lines;
 }

 void process(std::vector<std::string> &lines, size_t volume) {
-  size_t repeat = 1000;
+  size_t constexpr repeat = 1000;
  double volumeMB = volume / (1024. * 1024.);
  std::cout << "ASCII volume = " << volumeMB << " MB " << std::endl;
  pretty_print(volume, lines.size(), "fastfloat (64)",
-               time_it_ns(lines, findmax_fastfloat64<char>, repeat));
+               time_it_ns(lines, findmax_fastfloat<char, double>, repeat));
  pretty_print(volume, lines.size(), "fastfloat (32)",
-               time_it_ns(lines, findmax_fastfloat32<char>, repeat));
+               time_it_ns(lines, findmax_fastfloat<char, float>, repeat));

  std::vector<std::u16string> lines16 = widen(lines);
  volume = 2 * volume;
  volumeMB = volume / (1024. * 1024.);
  std::cout << "UTF-16 volume = " << volumeMB << " MB " << std::endl;
-  pretty_print(volume, lines.size(), "fastfloat (64)",
-               time_it_ns(lines16, findmax_fastfloat64<char16_t>, repeat));
+  pretty_print(
+      volume, lines.size(), "fastfloat (64)",
+      time_it_ns(lines16, findmax_fastfloat<char16_t, double>, repeat));
  pretty_print(volume, lines.size(), "fastfloat (32)",
-               time_it_ns(lines16, findmax_fastfloat32<char16_t>, repeat));
+               time_it_ns(lines16, findmax_fastfloat<char16_t, float>, repeat));
 }

 void fileload(std::string filename) {
@ -216,17 +212,38 @@ void fileload(std::string filename) {
  std::cout << "#### " << std::endl;
  std::string line;
  std::vector<std::string> lines;
-  lines.reserve(10000); // let us reserve plenty of memory.
+  lines.reserve(120000); // let us reserve plenty of memory.
  size_t volume = 0;
  while (getline(inputfile, line)) {
+#ifdef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
+    if (line[0] == '-') {
+      line.erase(0, 1);
+    }
+#endif
    volume += line.size();
-    lines.push_back(line);
+    lines.emplace_back(line);
  }
  std::cout << "# read " << lines.size() << " lines " << std::endl;
  process(lines, volume);
 }

 int main(int argc, char **argv) {
+#ifdef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
+  std::cout << "# FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN is enabled"
+            << std::endl;
+#endif
+#ifdef FASTFLOAT_TABLE_HACK_CHAR_DIGIT_LUT_DISABLED
+  std::cout << "# FASTFLOAT_TABLE_HACK_CHAR_DIGIT_LUT_DISABLED is enabled"
+            << std::endl;
+#endif
+#ifdef FASTFLOAT_ONLY_ROUNDS_TO_NEAREST_SUPPORTED
+  std::cout << "# FASTFLOAT_ONLY_ROUNDS_TO_NEAREST_SUPPORTED is enabled"
+            << std::endl;
+#endif
+#ifdef FASTFLOAT_ISNOT_CHECKED_BOUNDS
+  std::cout << "# FASTFLOAT_ISNOT_CHECKED_BOUNDS is enabled" << std::endl;
+#endif
+#ifdef USING_COUNTERS
  if (collector.has_events()) {
    std::cout << "# Using hardware counters" << std::endl;
  } else {
@ -236,11 +253,14 @@ int main(int argc, char **argv) {
              << std::endl;
 #endif
  }
+#endif
  if (argc > 1) {
    fileload(argv[1]);
    return EXIT_SUCCESS;
  }
+
  fileload(std::string(BENCHMARK_DATA_DIR) + "/canada.txt");
+  fileload(std::string(BENCHMARK_DATA_DIR) + "/canada_short.txt");
  fileload(std::string(BENCHMARK_DATA_DIR) + "/mesh.txt");
  return EXIT_SUCCESS;
 }
--- a/benchmarks/event_counter.h
+++ b/benchmarks/event_counter.h
@ -0,0 +1,182 @@
+#ifndef __EVENT_COUNTER_H
+#define __EVENT_COUNTER_H
+
+#include <cctype>
+#ifndef _MSC_VER
+#include <dirent.h>
+#endif
+#include <cinttypes>
+
+#include <cstring>
+
+#include <chrono>
+#include <array>
+
+#include "linux-perf-events.h"
+#ifdef __linux__
+#include <libgen.h>
+#endif
+
+#if (defined(__APPLE__) && __APPLE__) && (defined(__aarch64__) && __aarch64__)
+#include "apple_arm_events.h"
+#endif
+
+struct event_count {
+  // The types of counters (so we can read the getter more easily)
+  enum event_counter_types {
+    CPU_CYCLES = 0,
+    INSTRUCTIONS = 1,
+    BRANCHES = 2,
+    MISSED_BRANCHES = 3,
+    event_counter_types_size = 4
+  };
+
+  std::chrono::duration<double> elapsed;
+  std::array<unsigned long long, event_counter_types_size> event_counts;
+
+  event_count() : elapsed(0), event_counts{0, 0, 0, 0} {}
+
+  event_count(const std::chrono::duration<double> &_elapsed,
+              const std::array<unsigned long long, event_counter_types_size>
+                  &_event_counts)
+      : elapsed(_elapsed), event_counts(_event_counts) {}
+
+  event_count(const event_count &other)
+      : elapsed(other.elapsed), event_counts(other.event_counts) {}
+
+  double elapsed_sec() const {
+    return std::chrono::duration<double>(elapsed).count();
+  }
+
+  double elapsed_ns() const {
+    return std::chrono::duration<double, std::nano>(elapsed).count();
+  }
+
+  double cycles() const {
+    return static_cast<double>(event_counts[CPU_CYCLES]);
+  }
+
+  double instructions() const {
+    return static_cast<double>(event_counts[INSTRUCTIONS]);
+  }
+
+  double branches() const {
+    return static_cast<double>(event_counts[BRANCHES]);
+  }
+
+  double missed_branches() const {
+    return static_cast<double>(event_counts[MISSED_BRANCHES]);
+  }
+
+  event_count &operator=(const event_count &other) {
+    this->elapsed = other.elapsed;
+    this->event_counts = other.event_counts;
+    return *this;
+  }
+
+  event_count operator+(const event_count &other) const {
+    return event_count(elapsed + other.elapsed,
+                       {
+                           event_counts[0] + other.event_counts[0],
+                           event_counts[1] + other.event_counts[1],
+                           event_counts[2] + other.event_counts[2],
+                           event_counts[3] + other.event_counts[3],
+                       });
+  }
+
+  void operator+=(const event_count &other) { *this = *this + other; }
+};
+
+struct event_aggregate {
+  bool has_events = false;
+  int iterations = 0;
+  event_count total{};
+  event_count best{};
+  event_count worst{};
+
+  event_aggregate() = default;
+
+  void operator<<(const event_count &other) {
+    if (iterations == 0 || other.elapsed < best.elapsed) {
+      best = other;
+    }
+    if (iterations == 0 || other.elapsed > worst.elapsed) {
+      worst = other;
+    }
+    iterations++;
+    total += other;
+  }
+
+  double elapsed_sec() const { return total.elapsed_sec() / iterations; }
+
+  double elapsed_ns() const { return total.elapsed_ns() / iterations; }
+
+  double cycles() const { return total.cycles() / iterations; }
+
+  double instructions() const { return total.instructions() / iterations; }
+
+  double branches() const { return total.branches() / iterations; }
+
+  double missed_branches() const {
+    return total.missed_branches() / iterations;
+  }
+};
+
+struct event_collector {
+  event_count count{};
+  std::chrono::time_point<std::chrono::steady_clock> start_clock{};
+
+#if defined(__linux__)
+  LinuxEvents<PERF_TYPE_HARDWARE> linux_events;
+
+  event_collector()
+      : linux_events(std::array<unsigned long long,
+                                4 /*event_counter_types_size*/>{
+            PERF_COUNT_HW_CPU_CYCLES, PERF_COUNT_HW_INSTRUCTIONS,
+            PERF_COUNT_HW_BRANCH_INSTRUCTIONS, // Retired branch instructions
+            PERF_COUNT_HW_BRANCH_MISSES}) {}
+
+  bool has_events() { return linux_events.is_working(); }
+#elif __APPLE__ && __aarch64__
+  performance_counters diff;
+
+  event_collector() : diff(0) { setup_performance_counters(); }
+
+  bool has_events() { return setup_performance_counters(); }
+#else
+  event_collector() = default;
+
+  bool has_events() { return false; }
+#endif
+
+  inline void start() {
+#if defined(__linux)
+    linux_events.start();
+#elif __APPLE__ && __aarch64__
+    if (has_events()) {
+      diff = get_counters();
+    }
+#endif
+    start_clock = std::chrono::steady_clock::now();
+  }
+
+  inline event_count &end() {
+    const auto end_clock = std::chrono::steady_clock::now();
+#if defined(__linux)
+    linux_events.end(count.event_counts);
+#elif __APPLE__ && __aarch64__
+    if (has_events()) {
+      performance_counters end = get_counters();
+      diff = end - diff;
+    }
+    count.event_counts[0] = diff.cycles;
+    count.event_counts[1] = diff.instructions;
+    count.event_counts[2] = diff.branches;
+    count.event_counts[3] = diff.missed_branches;
+#endif
+    count.elapsed = end_clock - start_clock;
+    return count;
+  }
+};
+
+#endif
--- a/benchmarks/linux-perf-events.h
+++ b/benchmarks/linux-perf-events.h
@ -0,0 +1,105 @@
+#pragma once
+#ifdef __linux__
+
+#include <asm/unistd.h>       // for __NR_perf_event_open
+#include <linux/perf_event.h> // for perf event constants
+#include <sys/ioctl.h>        // for ioctl
+#include <unistd.h>           // for syscall
+
+#include <cerrno>  // for errno
+#include <cstring> // for memset
+#include <stdexcept>
+
+#include <array>
+#include <vector>
+
+template <int TYPE = PERF_TYPE_HARDWARE> class LinuxEvents {
+  int fd;
+  bool working;
+  perf_event_attr attribs{};
+  size_t num_events{};
+  std::vector<uint64_t> temp_result_vec{};
+  std::vector<uint64_t> ids{};
+
+public:
+  explicit LinuxEvents(std::array<unsigned long long, 4> config_vec)
+      : fd(0), working(true) {
+    memset(&attribs, 0, sizeof(attribs));
+    attribs.type = TYPE;
+    attribs.size = sizeof(attribs);
+    attribs.disabled = 1;
+    attribs.exclude_kernel = 1;
+    attribs.exclude_hv = 1;
+
+    attribs.sample_period = 0;
+    attribs.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
+    const int pid = 0;  // the current process
+    const int cpu = -1; // all CPUs
+    const unsigned long flags = 0;
+
+    int group = -1; // no group
+    num_events = config_vec.size();
+    ids.resize(config_vec.size());
+    uint32_t i = 0;
+    for (auto config : config_vec) {
+      attribs.config = config;
+      int _fd = static_cast<int>(
+          syscall(__NR_perf_event_open, &attribs, pid, cpu, group, flags));
+      if (_fd == -1) {
+        report_error("perf_event_open");
+      }
+      ioctl(_fd, PERF_EVENT_IOC_ID, &ids[i++]);
+      if (group == -1) {
+        group = _fd;
+        fd = _fd;
+      }
+    }
+
+    temp_result_vec.resize(num_events * 2 + 1);
+  }
+
+  ~LinuxEvents() {
+    if (fd != -1) {
+      close(fd);
+    }
+  }
+
+  inline void start() {
+    if (fd != -1) {
+      if (ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) == -1) {
+        report_error("ioctl(PERF_EVENT_IOC_RESET)");
+      }
+
+      if (ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) == -1) {
+        report_error("ioctl(PERF_EVENT_IOC_ENABLE)");
+      }
+    }
+  }
+
+  inline void end(std::array<unsigned long long, 4> &results) {
+    if (fd != -1) {
+      if (ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) == -1) {
+        report_error("ioctl(PERF_EVENT_IOC_DISABLE)");
+      }
+
+      if (read(fd, temp_result_vec.data(), temp_result_vec.size() * 8) == -1) {
+        report_error("read");
+      }
+    }
+    // our actual results are in slots 1,3,5, ... of this structure
+    for (uint32_t i = 1; i < temp_result_vec.size(); i += 2) {
+      results[i / 2] = temp_result_vec[i];
+    }
+    for (uint32_t i = 2; i < temp_result_vec.size(); i += 2) {
+      if (ids[i / 2 - 1] != temp_result_vec[i]) {
+        report_error("event mismatch");
+      }
+    }
+  }
+
+  bool is_working() { return working; }
+
+private:
+  void report_error(const std::string &) { working = false; }
+};
+#endif
--- a/include/fast_float/ascii_number.h
+++ b/include/fast_float/ascii_number.h
@ -10,17 +10,16 @@

 #include "float_common.h"

-#ifdef FASTFLOAT_SSE2
+#if defined(FASTFLOAT_SSE2)
 #include <emmintrin.h>
-#endif
-
-#ifdef FASTFLOAT_NEON
+#elif defined(FASTFLOAT_NEON)
 #include <arm_neon.h>
 #endif

 namespace fast_float {

-template <typename UC> fastfloat_really_inline constexpr bool has_simd_opt() {
+template <typename UC>
+fastfloat_really_inline constexpr bool has_simd_opt() noexcept {
 #ifdef FASTFLOAT_HAS_SIMD
  return std::is_same<UC, char16_t>::value;
 #else
@ -35,32 +34,32 @@ fastfloat_really_inline constexpr bool is_integer(UC c) noexcept {
  return (unsigned)(c - UC('0')) <= 9u;
 }

-fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) {
+fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) noexcept {
  return (val & 0xFF00000000000000) >> 56 | (val & 0x00FF000000000000) >> 40 |
         (val & 0x0000FF0000000000) >> 24 | (val & 0x000000FF00000000) >> 8 |
         (val & 0x00000000FF000000) << 8 | (val & 0x0000000000FF0000) << 24 |
         (val & 0x000000000000FF00) << 40 | (val & 0x00000000000000FF) << 56;
 }

-fastfloat_really_inline constexpr uint32_t byteswap_32(uint32_t val) {
+fastfloat_really_inline constexpr uint32_t byteswap(uint32_t val) noexcept {
  return (val >> 24) | ((val >> 8) & 0x0000FF00u) | ((val << 8) & 0x00FF0000u) |
         (val << 24);
 }

-// Read 8 UC into a u64. Truncates UC if not char.
-template <typename UC>
-fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t
-read8_to_u64(UC const *chars) {
+// Read UCs into an unsigned integer. Truncates UC if not char.
+template <typename T, typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20 T
+read_chars_to_unsigned(UC const *chars) noexcept {
  if (cpp20_and_in_constexpr() || !std::is_same<UC, char>::value) {
-    uint64_t val = 0;
-    for (int i = 0; i < 8; ++i) {
-      val |= uint64_t(uint8_t(*chars)) << (i * 8);
+    T val = 0;
+    for (uint_fast8_t i = 0; i != sizeof(T); ++i) {
+      val |= T(uint8_t(*chars)) << (i * 8);
      ++chars;
    }
    return val;
  }
-  uint64_t val;
-  ::memcpy(&val, chars, sizeof(uint64_t));
+  T val;
+  ::memcpy(&val, chars, sizeof(T));
 #if FASTFLOAT_IS_BIG_ENDIAN == 1
  // Need to read as-if the number was in little-endian order.
  val = byteswap(val);
@ -68,39 +67,19 @@ read8_to_u64(UC const *chars) {
  return val;
 }

-// Read 4 UC into a u32. Truncates UC if not char.
-template <typename UC>
-fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint32_t
-read4_to_u32(UC const *chars) {
-  if (cpp20_and_in_constexpr() || !std::is_same<UC, char>::value) {
-    uint32_t val = 0;
-    for (int i = 0; i < 4; ++i) {
-      val |= uint32_t(uint8_t(*chars)) << (i * 8);
-      ++chars;
-    }
-    return val;
-  }
-  uint32_t val;
-  ::memcpy(&val, chars, sizeof(uint32_t));
-#if FASTFLOAT_IS_BIG_ENDIAN == 1
-  val = byteswap_32(val);
-#endif
-  return val;
-}
 #ifdef FASTFLOAT_SSE2

-fastfloat_really_inline uint64_t simd_read8_to_u64(__m128i const data) {
-  FASTFLOAT_SIMD_DISABLE_WARNINGS
+fastfloat_really_inline uint64_t simd_read8_to_u64(__m128i const &data) {
+  // _mm_packus_epi16 is SSE2+, converts 8×u16 → 8×u8
  __m128i const packed = _mm_packus_epi16(data, data);
 #ifdef FASTFLOAT_64BIT
-  return uint64_t(_mm_cvtsi128_si64(packed));
+  return static_cast<uint64_t>(_mm_cvtsi128_si64(packed));
 #else
  uint64_t value;
  // Visual Studio + older versions of GCC don't support _mm_storeu_si64
  _mm_storel_epi64(reinterpret_cast<__m128i *>(&value), packed);
  return value;
 #endif
-  FASTFLOAT_SIMD_RESTORE_WARNINGS
 }

 fastfloat_really_inline uint64_t simd_read8_to_u64(char16_t const *chars) {
@ -112,11 +91,9 @@ fastfloat_really_inline uint64_t simd_read8_to_u64(char16_t const *chars) {

 #elif defined(FASTFLOAT_NEON)

-fastfloat_really_inline uint64_t simd_read8_to_u64(uint16x8_t const data) {
-  FASTFLOAT_SIMD_DISABLE_WARNINGS
+fastfloat_really_inline uint64_t simd_read8_to_u64(uint16x8_t const &data) {
  uint8x8_t utf8_packed = vmovn_u16(data);
  return vget_lane_u64(vreinterpret_u64_u8(utf8_packed), 0);
-  FASTFLOAT_SIMD_RESTORE_WARNINGS
 }

 fastfloat_really_inline uint64_t simd_read8_to_u64(char16_t const *chars) {
@ -141,7 +118,7 @@ uint64_t simd_read8_to_u64(UC const *) {

 // credit  @aqrit
 fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint32_t
-parse_eight_digits_unrolled(uint64_t val) {
+parse_eight_digits_unrolled(uint64_t val) noexcept {
  uint64_t const mask = 0x000000FF000000FF;
  uint64_t const mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
  uint64_t const mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
@ -156,7 +133,8 @@ template <typename UC>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint32_t
 parse_eight_digits_unrolled(UC const *chars) noexcept {
  if (cpp20_and_in_constexpr() || !has_simd_opt<UC>()) {
-    return parse_eight_digits_unrolled(read8_to_u64(chars)); // truncation okay
+    return parse_eight_digits_unrolled(
+        read_chars_to_unsigned<uint64_t>(chars)); // truncation okay
  }
  return parse_eight_digits_unrolled(simd_read8_to_u64(chars));
 }
@ -193,23 +171,27 @@ simd_parse_if_eight_digits_unrolled(char16_t const *chars,
  }
 #ifdef FASTFLOAT_SSE2
  FASTFLOAT_SIMD_DISABLE_WARNINGS
+  // Load 8 UTF-16 characters (16 bytes)
  __m128i const data =
      _mm_loadu_si128(reinterpret_cast<__m128i const *>(chars));
+  FASTFLOAT_SIMD_RESTORE_WARNINGS

-  // (x - '0') <= 9
+  // Branchless "are all digits?" trick from Lemire:
+  // (x - '0') <= 9  <=> (x + 32720) <= 32729
+  // encoded as signed comparison: (x + 32720) > -32759 ? not digit : digit
  // http://0x80.pl/articles/simd-parsing-int-sequences.html
  __m128i const t0 = _mm_add_epi16(data, _mm_set1_epi16(32720));
-  __m128i const t1 = _mm_cmpgt_epi16(t0, _mm_set1_epi16(-32759));
+  __m128i const mask = _mm_cmpgt_epi16(t0, _mm_set1_epi16(-32759));

-  if (_mm_movemask_epi8(t1) == 0) {
+  // If mask == 0 → all digits valid.
+  if (_mm_movemask_epi8(mask) == 0) {
    i = i * 100000000 + parse_eight_digits_unrolled(simd_read8_to_u64(data));
    return true;
-  } else
-    return false;
-  FASTFLOAT_SIMD_RESTORE_WARNINGS
+  }
 #elif defined(FASTFLOAT_NEON)
  FASTFLOAT_SIMD_DISABLE_WARNINGS
  uint16x8_t const data = vld1q_u16(reinterpret_cast<uint16_t const *>(chars));
+  FASTFLOAT_SIMD_RESTORE_WARNINGS

  // (x - '0') <= 9
  // http://0x80.pl/articles/simd-parsing-int-sequences.html
@ -219,14 +201,12 @@ simd_parse_if_eight_digits_unrolled(char16_t const *chars,
  if (vminvq_u16(mask) == 0xFFFF) {
    i = i * 100000000 + parse_eight_digits_unrolled(simd_read8_to_u64(data));
    return true;
-  } else
-    return false;
-  FASTFLOAT_SIMD_RESTORE_WARNINGS
+  }
 #else
  (void)chars;
  (void)i;
-  return false;
 #endif // FASTFLOAT_SSE2
+  return false;
 }

 #endif // FASTFLOAT_HAS_SIMD
@ -260,20 +240,25 @@ loop_parse_if_eight_digits(char const *&p, char const *const pend,
                           uint64_t &i) {
  // optimizes better than parse_if_eight_digits_unrolled() for UC = char.
  while ((std::distance(p, pend) >= 8) &&
-         is_made_of_eight_digits_fast(read8_to_u64(p))) {
+         is_made_of_eight_digits_fast(read_chars_to_unsigned<uint64_t>(p))) {
    i = i * 100000000 +
-        parse_eight_digits_unrolled(read8_to_u64(
+        parse_eight_digits_unrolled(read_chars_to_unsigned<uint64_t>(
            p)); // in rare cases, this will overflow, but that's ok
    p += 8;
  }
 }

-enum class parse_error {
+enum class parse_error : uint_fast8_t {
  no_error,
-  // [JSON-only] The minus sign must be followed by an integer.
-  missing_integer_after_sign,
  // A sign must be followed by an integer or dot.
  missing_integer_or_dot_after_sign,
+  // The mantissa must have at least one digit.
+  no_digits_in_mantissa,
+  // Scientific notation requires an exponential part.
+  missing_exponential_part,
+#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
+  // [JSON-only] The minus sign must be followed by an integer.
+  missing_integer_after_sign,
  // [JSON-only] The integer part must not have leading zeros.
  leading_zeros_in_integer_part,
  // [JSON-only] The integer part must have at least one digit.
@ -281,23 +266,25 @@ enum class parse_error {
  // [JSON-only] If there is a decimal point, there must be digits in the
  // fractional part.
  no_digits_in_fractional_part,
-  // The mantissa must have at least one digit.
-  no_digits_in_mantissa,
-  // Scientific notation requires an exponential part.
-  missing_exponential_part,
+#endif
 };

 template <typename UC> struct parsed_number_string_t {
-  int64_t exponent{0};
-  uint64_t mantissa{0};
-  UC const *lastmatch{nullptr};
-  bool negative{false};
-  bool valid{false};
-  bool too_many_digits{false};
+  am_mant_t mantissa;
+
+#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
+  bool negative;
+#endif
+  bool invalid;
+  bool too_many_digits;
+  parse_error error;
+
+  am_pow_t exponent;
+
  // contains the range of the significant digits
-  span<UC const> integer{};  // non-nullable
-  span<UC const> fraction{}; // nullable
-  parse_error error{parse_error::no_error};
+  span<UC const> integer;  // non-nullable
+  span<UC const> fraction; // nullable
+  UC const *lastmatch;
 };

 using byte_span = span<char const>;
@ -305,9 +292,9 @@ using parsed_number_string = parsed_number_string_t<char>;

 template <typename UC>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20 parsed_number_string_t<UC>
-report_parse_error(UC const *p, parse_error error) {
-  parsed_number_string_t<UC> answer;
-  answer.valid = false;
+report_parse_error(parsed_number_string_t<UC> &answer, UC const *p,
+                   parse_error error) noexcept {
+  answer.invalid = true;
  answer.lastmatch = p;
  answer.error = error;
  return answer;
@ -318,125 +305,154 @@ report_parse_error(UC const *p, parse_error error) {
 template <bool basic_json_fmt, typename UC>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20 parsed_number_string_t<UC>
 parse_number_string(UC const *p, UC const *pend,
-                    parse_options_t<UC> options) noexcept {
-  chars_format const fmt = detail::adjust_for_feature_macros(options.format);
-  UC const decimal_point = options.decimal_point;
-
-  parsed_number_string_t<UC> answer;
-  answer.valid = false;
-  answer.too_many_digits = false;
-  // assume p < pend, so dereference without checks;
+                    parse_options_t<UC> const options) noexcept {
+  parsed_number_string_t<UC> answer{};
+  // so dereference without checks
+  FASTFLOAT_ASSUME(p < pend);
+#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
  answer.negative = (*p == UC('-'));
  // C++17 20.19.3.(7.1) explicitly forbids '+' sign here
-  if ((*p == UC('-')) || (uint64_t(fmt & chars_format::allow_leading_plus) &&
-                          !basic_json_fmt && *p == UC('+'))) {
+  if (answer.negative ||
+      ((chars_format_t(options.format & chars_format::allow_leading_plus)) &&
+       (!basic_json_fmt && *p == UC('+')))) {
    ++p;
    if (p == pend) {
      return report_parse_error<UC>(
-          p, parse_error::missing_integer_or_dot_after_sign);
+          answer, p, parse_error::missing_integer_or_dot_after_sign);
    }
    FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
-      if (!is_integer(*p)) { // a sign must be followed by an integer
-        return report_parse_error<UC>(p,
+      // a sign must be followed by an integer
+      if (!is_integer(*p)) {
+        return report_parse_error<UC>(answer, p,
                                      parse_error::missing_integer_after_sign);
      }
    }
    else {
-      if (!is_integer(*p) &&
-          (*p !=
-           decimal_point)) { // a sign must be followed by an integer or the dot
+      // a sign must be followed by an integer or the dot
+      if (!is_integer(*p) && (*p != options.decimal_point)) {
        return report_parse_error<UC>(
-            p, parse_error::missing_integer_or_dot_after_sign);
+            answer, p, parse_error::missing_integer_or_dot_after_sign);
      }
    }
  }
-  UC const *const start_digits = p;
+#endif

-  uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad)
+  auto const *const start_digits = p;

  while ((p != pend) && is_integer(*p)) {
    // a multiplication by 10 is cheaper than an arbitrary integer
    // multiplication
-    i = 10 * i +
-        uint64_t(*p -
-                 UC('0')); // might overflow, we will handle the overflow later
+    answer.mantissa = static_cast<fast_float::am_mant_t>(
+        answer.mantissa * 10 +
+        static_cast<uint8_t>(
+            *p - UC('0'))); // might overflow, we will handle the overflow later
    ++p;
  }
-  UC const *const end_of_integer_part = p;
-  int64_t digit_count = int64_t(end_of_integer_part - start_digits);
-  answer.integer = span<UC const>(start_digits, size_t(digit_count));
+
+  auto const *const end_of_integer_part = p;
+  auto digit_count = static_cast<am_digits>(end_of_integer_part - start_digits);
+  answer.integer = span<UC const>(start_digits, digit_count);
+  // We have now parsed the integer part of the mantissa.
+
+#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
  FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
    // at least 1 digit in integer part, without leading zeros
    if (digit_count == 0) {
-      return report_parse_error<UC>(p, parse_error::no_digits_in_integer_part);
+      return report_parse_error<UC>(answer, p,
+                                    parse_error::no_digits_in_integer_part);
    }
    if ((start_digits[0] == UC('0') && digit_count > 1)) {
-      return report_parse_error<UC>(start_digits,
+      return report_parse_error<UC>(answer, start_digits,
                                    parse_error::leading_zeros_in_integer_part);
    }
  }
+#endif

-  int64_t exponent = 0;
-  bool const has_decimal_point = (p != pend) && (*p == decimal_point);
-  if (has_decimal_point) {
+  // We can now parse the fraction part of the mantissa.
+  if ((p != pend) && (*p == options.decimal_point)) {
    ++p;
-    UC const *before = p;
+    auto const *const before = p;
    // can occur at most twice without overflowing, but let it occur more, since
    // for integers with many digits, digit parsing is the primary bottleneck.
-    loop_parse_if_eight_digits(p, pend, i);
+    loop_parse_if_eight_digits(p, pend, answer.mantissa);

    while ((p != pend) && is_integer(*p)) {
-      uint8_t digit = uint8_t(*p - UC('0'));
-      ++p;
-      i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
-    }
-    exponent = before - p;
-    answer.fraction = span<UC const>(before, size_t(p - before));
-    digit_count -= exponent;
-  }
-  FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
-    // at least 1 digit in fractional part
-    if (has_decimal_point && exponent == 0) {
-      return report_parse_error<UC>(p,
-                                    parse_error::no_digits_in_fractional_part);
-    }
-  }
-  else if (digit_count == 0) { // we must have encountered at least one integer!
-    return report_parse_error<UC>(p, parse_error::no_digits_in_mantissa);
-  }
-  int64_t exp_number = 0; // explicit exponential part
-  if ((uint64_t(fmt & chars_format::scientific) && (p != pend) &&
-       ((UC('e') == *p) || (UC('E') == *p))) ||
-      (uint64_t(fmt & detail::basic_fortran_fmt) && (p != pend) &&
-       ((UC('+') == *p) || (UC('-') == *p) || (UC('d') == *p) ||
-        (UC('D') == *p)))) {
-    UC const *location_of_e = p;
-    if ((UC('e') == *p) || (UC('E') == *p) || (UC('d') == *p) ||
-        (UC('D') == *p)) {
+      auto const digit = uint8_t(*p - UC('0'));
+      answer.mantissa = static_cast<fast_float::am_mant_t>(
+          answer.mantissa * 10 +
+          digit); // in rare cases, this will overflow, but that's ok
      ++p;
    }
+    answer.exponent = static_cast<am_pow_t>(before - p);
+    answer.fraction =
+        span<UC const>(before, static_cast<am_digits>(p - before));
+    digit_count -= static_cast<am_digits>(answer.exponent);
+#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
+    FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
+      // at least 1 digit in fractional part
+      if (answer.exponent == 0) {
+        return report_parse_error<UC>(
+            answer, p, parse_error::no_digits_in_fractional_part);
+      }
+    }
+#endif
+  } else if (digit_count == 0) {
+    // We must have encountered at least one integer!
+    return report_parse_error<UC>(answer, p,
+                                  parse_error::no_digits_in_mantissa);
+  }
+  // We have now parsed the integer and the fraction part of the mantissa.
+
+  // Now we can parse the explicit exponential part.
+  am_pow_t exp_number = 0; // explicit exponential part
+  if ((p != pend) &&
+      ((chars_format_t(options.format & chars_format::scientific) &&
+        (UC('e') == *p || UC('E') == *p))
+#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
+       || (chars_format_t(options.format & detail::basic_fortran_fmt) &&
+           ((UC('+') == *p) || (UC('-') == *p) || (UC('d') == *p) ||
+            (UC('D') == *p)))
+#endif
+           )) {
+    auto const *location_of_e = p;
+#ifdef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
+    ++p;
+#else
+    if ((UC('e') == *p) || (UC('E') == *p)
+#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
+        || (UC('d') == *p) || (UC('D') == *p)
+#endif
+    ) {
+      ++p;
+    }
+#endif
    bool neg_exp = false;
-    if ((p != pend) && (UC('-') == *p)) {
-      neg_exp = true;
-      ++p;
-    } else if ((p != pend) &&
-               (UC('+') ==
-                *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1)
-      ++p;
+    if (p != pend) {
+      if (UC('-') == *p) {
+        neg_exp = true;
+        ++p;
+      } else if (UC('+') == *p) {
+        // '+' on exponent is allowed by C++17 20.19.3.(7.1)
+        ++p;
+      }
    }
+    // We have now parsed the sign of the exponent.
    if ((p == pend) || !is_integer(*p)) {
-      if (!uint64_t(fmt & chars_format::fixed)) {
-        // The exponential part is invalid for scientific notation, so it must
-        // be a trailing token for fixed notation. However, fixed notation is
-        // disabled, so report a scientific notation error.
-        return report_parse_error<UC>(p, parse_error::missing_exponential_part);
+      if (!(chars_format_t(options.format & chars_format::fixed))) {
+        // The exponential part is invalid for scientific notation, so it
+        // must be a trailing token for fixed notation. However, fixed
+        // notation is disabled, so report a scientific notation error.
+        return report_parse_error<UC>(answer, p,
+                                      parse_error::missing_exponential_part);
      }
      // Otherwise, we will be ignoring the 'e'.
      p = location_of_e;
    } else {
+      // Now let's parse the explicit exponent.
      while ((p != pend) && is_integer(*p)) {
-        uint8_t digit = uint8_t(*p - UC('0'));
-        if (exp_number < 0x10000000) {
+        if (exp_number < am_bias_limit) {
+          // check for exponent overflow if we have too many digits.
+          auto const digit = uint8_t(*p - UC('0'));
          exp_number = 10 * exp_number + digit;
        }
        ++p;
@ -444,17 +460,21 @@ parse_number_string(UC const *p, UC const *pend,
      if (neg_exp) {
        exp_number = -exp_number;
      }
-      exponent += exp_number;
+      answer.exponent += exp_number;
    }
  } else {
    // If it scientific and not fixed, we have to bail out.
-    if (uint64_t(fmt & chars_format::scientific) &&
-        !uint64_t(fmt & chars_format::fixed)) {
-      return report_parse_error<UC>(p, parse_error::missing_exponential_part);
+    if ((chars_format_t(options.format & chars_format::scientific)) &&
+        !(chars_format_t(options.format & chars_format::fixed))) {
+      return report_parse_error<UC>(answer, p,
+                                    parse_error::missing_exponential_part);
    }
  }
+
+  // We parsed all parts of the number, let's save progress.
  answer.lastmatch = p;
-  answer.valid = true;
+
+  // Now we can check for errors.

  // If we frequently had to deal with long strings of digits,
  // we could extend our code by using a 128-bit integer instead
@ -466,58 +486,64 @@ parse_number_string(UC const *p, UC const *pend,
    // We have to handle the case where we have 0.0000somenumber.
    // We need to be mindful of the case where we only have zeroes...
    // E.g., 0.000000000...000.
-    UC const *start = start_digits;
-    while ((start != pend) && (*start == UC('0') || *start == decimal_point)) {
+    auto const *start = start_digits;
+    while ((start != pend) &&
+           (*start == UC('0') || *start == options.decimal_point)) {
      if (*start == UC('0')) {
-        digit_count--;
+        --digit_count;
      }
-      start++;
+      ++start;
    }

+    // We have to check if number has more than 19 significant digits.
    if (digit_count > 19) {
      answer.too_many_digits = true;
      // Let us start again, this time, avoiding overflows.
      // We don't need to call if is_integer, since we use the
      // pre-tokenized spans from above.
-      i = 0;
+      answer.mantissa = 0;
      p = answer.integer.ptr;
      UC const *int_end = p + answer.integer.len();
-      uint64_t const minimal_nineteen_digit_integer{1000000000000000000};
-      while ((i < minimal_nineteen_digit_integer) && (p != int_end)) {
-        i = i * 10 + uint64_t(*p - UC('0'));
+      constexpr am_mant_t minimal_nineteen_digit_integer{1000000000000000000};
+      while ((p != int_end) &&
+             (answer.mantissa < minimal_nineteen_digit_integer)) {
+        answer.mantissa =
+            answer.mantissa * 10 + static_cast<am_mant_t>(*p - UC('0'));
        ++p;
      }
-      if (i >= minimal_nineteen_digit_integer) { // We have a big integer
-        exponent = end_of_integer_part - p + exp_number;
-      } else { // We have a value with a fractional component.
+      if (answer.mantissa >= minimal_nineteen_digit_integer) {
+        // We have a big integers, so skip the fraction part completely.
+        answer.exponent = am_pow_t(end_of_integer_part - p) + exp_number;
+      } else {
+        // We have a value with a significant fractional component.
        p = answer.fraction.ptr;
-        UC const *frac_end = p + answer.fraction.len();
-        while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
-          i = i * 10 + uint64_t(*p - UC('0'));
+        UC const *const frac_end = p + answer.fraction.len();
+        while ((p != frac_end) &&
+               (answer.mantissa < minimal_nineteen_digit_integer)) {
+          answer.mantissa = static_cast<am_mant_t>(
+              answer.mantissa * 10 + static_cast<am_mant_t>(*p - UC('0')));
          ++p;
        }
-        exponent = answer.fraction.ptr - p + exp_number;
+        answer.exponent = am_pow_t(answer.fraction.ptr - p) + exp_number;
      }
-      // We have now corrected both exponent and i, to a truncated value
+      // We now corrected both exponent and mantissa, to a truncated value
    }
  }
-  answer.exponent = exponent;
-  answer.mantissa = i;
+
  return answer;
 }

 template <typename T, typename UC>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
 parse_int_string(UC const *p, UC const *pend, T &value,
-                 parse_options_t<UC> options) {
-  chars_format const fmt = detail::adjust_for_feature_macros(options.format);
-  int const base = options.base;
-
+                 parse_options_t<UC> const options) noexcept {
  from_chars_result_t<UC> answer;

-  UC const *const first = p;
+  auto const *const first = p;

-  bool const negative = (*p == UC('-'));
+#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
+  // Read sign
+  auto const negative = (*p == UC('-'));
 #ifdef FASTFLOAT_VISUAL_STUDIO
 #pragma warning(push)
 #pragma warning(disable : 4127)
@ -530,24 +556,27 @@ parse_int_string(UC const *p, UC const *pend, T &value,
    answer.ptr = first;
    return answer;
  }
-  if ((*p == UC('-')) ||
-      (uint64_t(fmt & chars_format::allow_leading_plus) && (*p == UC('+')))) {
+  if (negative ||
+      ((chars_format_t(options.format & chars_format::allow_leading_plus)) &&
+       (*p == UC('+')))) {
    ++p;
  }
+#endif

-  UC const *const start_num = p;
+  auto const *const start_num = p;

+  // Skip leading zeros
  while (p != pend && *p == UC('0')) {
    ++p;
  }

-  bool const has_leading_zeros = p > start_num;
+  auto const has_leading_zeros = p > start_num;

-  UC const *const start_digits = p;
+  auto const *const start_digits = p;

  FASTFLOAT_IF_CONSTEXPR17((std::is_same<T, std::uint8_t>::value)) {
-    if (base == 10) {
-      const size_t len = (size_t)(pend - p);
+    if (options.base == 10) {
+      auto const len = static_cast<am_digits>(pend - p);
      if (len == 0) {
        if (has_leading_zeros) {
          value = 0;
@ -562,53 +591,39 @@ parse_int_string(UC const *p, UC const *pend, T &value,

      uint32_t digits;

-#if FASTFLOAT_HAS_IS_CONSTANT_EVALUATED && FASTFLOAT_HAS_BIT_CAST
-      if (std::is_constant_evaluated()) {
-        uint8_t str[4]{};
-        for (size_t j = 0; j < 4 && j < len; ++j) {
-          str[j] = static_cast<uint8_t>(p[j]);
-        }
-        digits = std::bit_cast<uint32_t>(str);
-#if FASTFLOAT_IS_BIG_ENDIAN
-        digits = byteswap_32(digits);
-#endif
-      }
-#else
-      if (false) {
-      }
-#endif
-      else if (len >= 4) {
-        ::memcpy(&digits, p, 4);
-#if FASTFLOAT_IS_BIG_ENDIAN
-        digits = byteswap_32(digits);
-#endif
+      if (len >= sizeof(uint32_t)) {
+        digits = read_chars_to_unsigned<uint32_t>(p);
      } else {
-        uint32_t b0 = static_cast<uint8_t>(p[0]);
-        uint32_t b1 = (len > 1) ? static_cast<uint8_t>(p[1]) : 0xFFu;
-        uint32_t b2 = (len > 2) ? static_cast<uint8_t>(p[2]) : 0xFFu;
-        uint32_t b3 = 0xFFu;
+        uint32_t const b0 = static_cast<uint8_t>(p[0]);
+        uint32_t const b1 = (len > 1) ? static_cast<uint8_t>(p[1]) : 0x00u;
+        uint32_t const b2 = (len > 2) ? static_cast<uint8_t>(p[2]) : 0x00u;
+        uint32_t const b3 = 0x00u;
        digits = b0 | (b1 << 8) | (b2 << 16) | (b3 << 24);
      }
+#if FASTFLOAT_IS_BIG_ENDIAN
+      digits = byteswap(digits);
+#endif

-      uint32_t magic =
+      uint32_t const magic =
          ((digits + 0x46464646u) | (digits - 0x30303030u)) & 0x80808080u;
-      uint32_t tz = (uint32_t)countr_zero_32(magic); // 7, 15, 23, 31, or 32
-      uint32_t nd = (tz == 32) ? 4 : (tz >> 3);
-      nd = (uint32_t)std::min((size_t)nd, len);
+      auto const tz = countr_zero_32(magic); // 7, 15, 23, 31, or 32
+      auto nd = static_cast<am_digits>(tz >> 3);
+      nd = std::min(nd, len);
      if (nd == 0) {
        if (has_leading_zeros) {
          value = 0;
          answer.ec = std::errc();
          answer.ptr = p;
          return answer;
+        } else {
+          answer.ec = std::errc::invalid_argument;
+          answer.ptr = first;
        }
-        answer.ec = std::errc::invalid_argument;
-        answer.ptr = first;
        return answer;
      }
      if (nd > 3) {
        const UC *q = p + nd;
-        size_t rem = len - nd;
+        auto rem = len - nd;
        while (rem) {
          if (*q < UC('0') || *q > UC('9'))
            break;
@ -623,14 +638,15 @@ parse_int_string(UC const *p, UC const *pend, T &value,
      digits ^= 0x30303030u;
      digits <<= ((4 - nd) * 8);

-      uint32_t check = ((digits >> 24) & 0xff) | ((digits >> 8) & 0xff00) |
-                       ((digits << 8) & 0xff0000);
+      uint32_t const check = ((digits >> 24) & 0xff) |
+                             ((digits >> 8) & 0xff00) |
+                             ((digits << 8) & 0xff0000);
      if (check > 0x00020505) {
        answer.ec = std::errc::result_out_of_range;
        answer.ptr = p + nd;
        return answer;
      }
-      value = (uint8_t)((0x640a01 * digits) >> 24);
+      value = static_cast<uint8_t>((0x640a01 * digits) >> 24);
      answer.ec = std::errc();
      answer.ptr = p + nd;
      return answer;
@ -638,8 +654,8 @@ parse_int_string(UC const *p, UC const *pend, T &value,
  }

  FASTFLOAT_IF_CONSTEXPR17((std::is_same<T, std::uint16_t>::value)) {
-    if (base == 10) {
-      const size_t len = size_t(pend - p);
+    if (options.base == 10) {
+      const auto len = static_cast<am_digits>(pend - p);
      if (len == 0) {
        if (has_leading_zeros) {
          value = 0;
@ -652,22 +668,22 @@ parse_int_string(UC const *p, UC const *pend, T &value,
        return answer;
      }

-      if (len >= 4) {
-        uint32_t digits = read4_to_u32(p);
+      if (len >= sizeof(uint32_t)) {
+        auto const digits = read_chars_to_unsigned<uint32_t>(p);
        if (is_made_of_four_digits_fast(digits)) {
-          uint32_t v = parse_four_digits_unrolled(digits);
+          auto v = parse_four_digits_unrolled(digits);
          if (len >= 5 && is_integer(p[4])) {
-            v = v * 10 + uint32_t(p[4] - '0');
+            v = v * 10 + static_cast<uint32_t>(p[4] - '0');
            if (len >= 6 && is_integer(p[5])) {
              answer.ec = std::errc::result_out_of_range;
-              const UC *q = p + 5;
+              const auto *q = p + 5;
              while (q != pend && is_integer(*q)) {
-                q++;
+                ++q;
              }
              answer.ptr = q;
              return answer;
            }
-            if (v > 65535) {
+            if (v > std::numeric_limits<uint16_t>::max()) {
              answer.ec = std::errc::result_out_of_range;
              answer.ptr = p + 5;
              return answer;
@ -687,20 +703,21 @@ parse_int_string(UC const *p, UC const *pend, T &value,
    }
  }

-  uint64_t i = 0;
-  if (base == 10) {
+  // Parse digits
+  am_mant_t i = 0;
+  if (options.base == 10) {
    loop_parse_if_eight_digits(p, pend, i); // use SIMD if possible
  }
  while (p != pend) {
-    uint8_t digit = ch_to_digit(*p);
-    if (digit >= base) {
+    auto const digit = ch_to_digit(*p);
+    if (digit >= options.base) {
      break;
    }
-    i = uint64_t(base) * i + digit; // might overflow, check this later
-    p++;
+    i = am_mant_t(options.base) * i + digit; // might overflow, check this later
+    ++p;
  }

-  size_t digit_count = size_t(p - start_digits);
+  auto const digit_count = static_cast<am_digits>(p - start_digits);

  if (digit_count == 0) {
    if (has_leading_zeros) {
@ -717,30 +734,38 @@ parse_int_string(UC const *p, UC const *pend, T &value,
  answer.ptr = p;

  // check u64 overflow
-  size_t max_digits = max_digits_u64(base);
+  auto const max_digits = max_digits_u64(options.base);
  if (digit_count > max_digits) {
    answer.ec = std::errc::result_out_of_range;
    return answer;
  }
  // this check can be eliminated for all other types, but they will all require
  // a max_digits(base) equivalent
-  if (digit_count == max_digits && i < min_safe_u64(base)) {
+  if (digit_count == max_digits && i < min_safe_u64(options.base)) {
    answer.ec = std::errc::result_out_of_range;
    return answer;
  }

  // check other types overflow
-  if (!std::is_same<T, uint64_t>::value) {
-    if (i > uint64_t(std::numeric_limits<T>::max()) + uint64_t(negative)) {
+  if (!std::is_same<T, am_mant_t>::value) {
+    if (i > am_mant_t(std::numeric_limits<T>::max())
+#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
+                + uint8_t(negative)
+#endif
+    ) {
      answer.ec = std::errc::result_out_of_range;
      return answer;
    }
  }

+#ifdef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
+  value = T(i);
+#else
  if (negative) {
 #ifdef FASTFLOAT_VISUAL_STUDIO
 #pragma warning(push)
 #pragma warning(disable : 4146)
+#pragma warning(disable : 4804)
 #endif
    // this weird workaround is required because:
    // - converting unsigned to signed when its value is greater than signed max
@ -749,13 +774,14 @@ parse_int_string(UC const *p, UC const *pend, T &value,
    // this is always optimized into a neg instruction (note: T is an integer
    // type)
    value = T(-std::numeric_limits<T>::max() -
-              T(i - uint64_t(std::numeric_limits<T>::max())));
+              T(i - am_mant_t(std::numeric_limits<T>::max())));
 #ifdef FASTFLOAT_VISUAL_STUDIO
 #pragma warning(pop)
 #endif
  } else {
    value = T(i);
  }
+#endif

  answer.ec = std::errc();
  return answer;
--- a/include/fast_float/bigint.h
+++ b/include/fast_float/bigint.h
@ -19,11 +19,11 @@ namespace fast_float {
 #if defined(FASTFLOAT_64BIT) && !defined(__sparc)
 #define FASTFLOAT_64BIT_LIMB 1
 typedef uint64_t limb;
-constexpr size_t limb_bits = 64;
+constexpr limb_t limb_bits = 64;
 #else
 #define FASTFLOAT_32BIT_LIMB
 typedef uint32_t limb;
-constexpr size_t limb_bits = 32;
+constexpr limb_t limb_bits = 32;
 #endif

 typedef span<limb> limb_span;
@ -32,59 +32,58 @@ typedef span<limb> limb_span;
 // of bits required to store the largest bigint, which is
 // `log2(10**(digits + max_exp))`, or `log2(10**(767 + 342))`, or
 // ~3600 bits, so we round to 4000.
-constexpr size_t bigint_bits = 4000;
-constexpr size_t bigint_limbs = bigint_bits / limb_bits;
+typedef uint_fast16_t bigint_bits_t;
+constexpr bigint_bits_t bigint_bits = 4000;
+constexpr limb_t bigint_limbs = bigint_bits / limb_bits;

 // vector-like type that is allocated on the stack. the entire
 // buffer is pre-allocated, and only the length changes.
-template <uint16_t size> struct stackvec {
+template <limb_t size> struct stackvec {
  limb data[size];
  // we never need more than 150 limbs
-  uint16_t length{0};
+  limb_t length{0};

-  stackvec() = default;
+  FASTFLOAT_CONSTEXPR20 stackvec() noexcept = default;
  stackvec(stackvec const &) = delete;
  stackvec &operator=(stackvec const &) = delete;
  stackvec(stackvec &&) = delete;
  stackvec &operator=(stackvec &&other) = delete;

  // create stack vector from existing limb span.
-  FASTFLOAT_CONSTEXPR20 stackvec(limb_span s) {
+  FASTFLOAT_CONSTEXPR20 stackvec(limb_span s) noexcept {
    FASTFLOAT_ASSERT(try_extend(s));
  }

-  FASTFLOAT_CONSTEXPR14 limb &operator[](size_t index) noexcept {
+  FASTFLOAT_CONSTEXPR14 limb &operator[](limb_t index) noexcept {
    FASTFLOAT_DEBUG_ASSERT(index < length);
    return data[index];
  }

-  FASTFLOAT_CONSTEXPR14 const limb &operator[](size_t index) const noexcept {
+  FASTFLOAT_CONSTEXPR14 const limb &operator[](limb_t index) const noexcept {
    FASTFLOAT_DEBUG_ASSERT(index < length);
    return data[index];
  }

  // index from the end of the container
-  FASTFLOAT_CONSTEXPR14 const limb &rindex(size_t index) const noexcept {
+  FASTFLOAT_CONSTEXPR14 const limb &rindex(limb_t index) const noexcept {
    FASTFLOAT_DEBUG_ASSERT(index < length);
-    size_t rindex = length - index - 1;
+    auto rindex = length - index - 1;
    return data[rindex];
  }

  // set the length, without bounds checking.
-  FASTFLOAT_CONSTEXPR14 void set_len(size_t len) noexcept {
-    length = uint16_t(len);
-  }
+  FASTFLOAT_CONSTEXPR14 void set_len(limb_t len) noexcept { length = len; }

-  constexpr size_t len() const noexcept { return length; }
+  constexpr limb_t len() const noexcept { return length; }

  constexpr bool is_empty() const noexcept { return length == 0; }

-  constexpr size_t capacity() const noexcept { return size; }
+  constexpr limb_t capacity() const noexcept { return size; }

  // append item to vector, without bounds checking
  FASTFLOAT_CONSTEXPR14 void push_unchecked(limb value) noexcept {
    data[length] = value;
-    length++;
+    ++length;
  }

  // append item to vector, returning if item was added
@ -101,7 +100,7 @@ template <uint16_t size> struct stackvec {
  FASTFLOAT_CONSTEXPR20 void extend_unchecked(limb_span s) noexcept {
    limb *ptr = data + length;
    std::copy_n(s.ptr, s.len(), ptr);
-    set_len(len() + s.len());
+    set_len(len() + static_cast<limb_t>(s.len()));
  }

  // try to add items to the vector, returning if items were added
@ -118,9 +117,9 @@ template <uint16_t size> struct stackvec {
  // if the new size is longer than the vector, assign value to each
  // appended item.
  FASTFLOAT_CONSTEXPR20
-  void resize_unchecked(size_t new_len, limb value) noexcept {
+  void resize_unchecked(limb_t new_len, limb value) noexcept {
    if (new_len > len()) {
-      size_t count = new_len - len();
+      auto count = new_len - len();
      limb *first = data + len();
      limb *last = first + count;
      ::std::fill(first, last, value);
@ -131,7 +130,7 @@ template <uint16_t size> struct stackvec {
  }

  // try to resize the vector, returning if the vector was resized.
-  FASTFLOAT_CONSTEXPR20 bool try_resize(size_t new_len, limb value) noexcept {
+  FASTFLOAT_CONSTEXPR20 bool try_resize(limb_t new_len, limb value) noexcept {
    if (new_len > capacity()) {
      return false;
    } else {
@ -143,12 +142,12 @@ template <uint16_t size> struct stackvec {
  // check if any limbs are non-zero after the given index.
  // this needs to be done in reverse order, since the index
  // is relative to the most significant limbs.
-  FASTFLOAT_CONSTEXPR14 bool nonzero(size_t index) const noexcept {
+  FASTFLOAT_CONSTEXPR14 bool nonzero(limb_t index) const noexcept {
    while (index < len()) {
      if (rindex(index) != 0) {
        return true;
      }
-      index++;
+      ++index;
    }
    return false;
  }
@ -156,7 +155,7 @@ template <uint16_t size> struct stackvec {
  // normalize the big integer, so most-significant zero limbs are removed.
  FASTFLOAT_CONSTEXPR14 void normalize() noexcept {
    while (len() > 0 && rindex(0) == 0) {
-      length--;
+      --length;
    }
  }
 };
@ -170,18 +169,18 @@ empty_hi64(bool &truncated) noexcept {
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t
 uint64_hi64(uint64_t r0, bool &truncated) noexcept {
  truncated = false;
-  int shl = leading_zeroes(r0);
+  auto shl = leading_zeroes(r0);
  return r0 << shl;
 }

 fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t
 uint64_hi64(uint64_t r0, uint64_t r1, bool &truncated) noexcept {
-  int shl = leading_zeroes(r0);
+  auto shl = leading_zeroes(r0);
  if (shl == 0) {
    truncated = r1 != 0;
    return r0;
  } else {
-    int shr = 64 - shl;
+    limb_t shr = 64 - shl;
    truncated = (r1 << shl) != 0;
    return (r0 << shl) | (r1 >> shr);
  }
@ -258,16 +257,14 @@ scalar_mul(limb x, limb y, limb &carry) noexcept {

 // add scalar value to bigint starting from offset.
 // used in grade school multiplication
-template <uint16_t size>
-inline FASTFLOAT_CONSTEXPR20 bool small_add_from(stackvec<size> &vec, limb y,
-                                                 size_t start) noexcept {
-  size_t index = start;
-  limb carry = y;
+template <limb_t size>
+inline FASTFLOAT_CONSTEXPR20 bool
+small_add_from(stackvec<size> &vec, limb carry, limb_t start) noexcept {
  bool overflow;
-  while (carry != 0 && index < vec.len()) {
-    vec[index] = scalar_add(vec[index], carry, overflow);
+  while (carry != 0 && start < vec.len()) {
+    vec[start] = scalar_add(vec[start], carry, overflow);
    carry = limb(overflow);
-    index += 1;
+    ++start;
  }
  if (carry != 0) {
    FASTFLOAT_TRY(vec.try_push(carry));
@ -276,18 +273,18 @@ inline FASTFLOAT_CONSTEXPR20 bool small_add_from(stackvec<size> &vec, limb y,
 }

 // add scalar value to bigint.
-template <uint16_t size>
+template <limb_t size>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool
 small_add(stackvec<size> &vec, limb y) noexcept {
  return small_add_from(vec, y, 0);
 }

 // multiply bigint by scalar value.
-template <uint16_t size>
+template <limb_t size>
 inline FASTFLOAT_CONSTEXPR20 bool small_mul(stackvec<size> &vec,
                                            limb y) noexcept {
  limb carry = 0;
-  for (size_t index = 0; index < vec.len(); index++) {
+  for (limb_t index = 0; index != vec.len(); ++index) {
    vec[index] = scalar_mul(vec[index], y, carry);
  }
  if (carry != 0) {
@ -298,17 +295,18 @@ inline FASTFLOAT_CONSTEXPR20 bool small_mul(stackvec<size> &vec,

 // add bigint to bigint starting from index.
 // used in grade school multiplication
-template <uint16_t size>
+template <limb_t size>
 FASTFLOAT_CONSTEXPR20 bool large_add_from(stackvec<size> &x, limb_span y,
-                                          size_t start) noexcept {
+                                          limb_t start) noexcept {
  // the effective x buffer is from `xstart..x.len()`, so exit early
  // if we can't get that current range.
-  if (x.len() < start || y.len() > x.len() - start) {
-    FASTFLOAT_TRY(x.try_resize(y.len() + start, 0));
+  if (x.len() < start ||
+      y.len() > static_cast<uint_fast16_t>(x.len() - start)) {
+    FASTFLOAT_TRY(x.try_resize(static_cast<limb_t>(y.len()) + start, 0));
  }

  bool carry = false;
-  for (size_t index = 0; index < y.len(); index++) {
+  for (limb_t index = 0; index != y.len(); ++index) {
    limb xi = x[index + start];
    limb yi = y[index];
    bool c1 = false;
@ -323,20 +321,20 @@ FASTFLOAT_CONSTEXPR20 bool large_add_from(stackvec<size> &x, limb_span y,

  // handle overflow
  if (carry) {
-    FASTFLOAT_TRY(small_add_from(x, 1, y.len() + start));
+    FASTFLOAT_TRY(small_add_from(x, 1, static_cast<limb_t>(y.len()) + start));
  }
  return true;
 }

 // add bigint to bigint.
-template <uint16_t size>
+template <limb_t size>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool
 large_add_from(stackvec<size> &x, limb_span y) noexcept {
  return large_add_from(x, y, 0);
 }

 // grade-school multiplication algorithm
-template <uint16_t size>
+template <limb_t size>
 FASTFLOAT_CONSTEXPR20 bool long_mul(stackvec<size> &x, limb_span y) noexcept {
  limb_span xs = limb_span(x.data, x.len());
  stackvec<size> z(xs);
@ -345,7 +343,7 @@ FASTFLOAT_CONSTEXPR20 bool long_mul(stackvec<size> &x, limb_span y) noexcept {
  if (y.len() != 0) {
    limb y0 = y[0];
    FASTFLOAT_TRY(small_mul(x, y0));
-    for (size_t index = 1; index < y.len(); index++) {
+    for (limb_t index = 1; index < y.len(); ++index) {
      limb yi = y[index];
      stackvec<size> zi;
      if (yi != 0) {
@ -364,7 +362,7 @@ FASTFLOAT_CONSTEXPR20 bool long_mul(stackvec<size> &x, limb_span y) noexcept {
 }

 // grade-school multiplication algorithm
-template <uint16_t size>
+template <limb_t size>
 FASTFLOAT_CONSTEXPR20 bool large_mul(stackvec<size> &x, limb_span y) noexcept {
  if (y.len() == 1) {
    FASTFLOAT_TRY(small_mul(x, y[0]));
@ -375,7 +373,7 @@ FASTFLOAT_CONSTEXPR20 bool large_mul(stackvec<size> &x, limb_span y) noexcept {
 }

 template <typename = void> struct pow5_tables {
-  static constexpr uint32_t large_step = 135;
+  static constexpr limb_t large_step = 135;
  static constexpr uint64_t small_power_of_5[] = {
      1UL,
      5UL,
@ -419,7 +417,7 @@ template <typename = void> struct pow5_tables {

 #if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE

-template <typename T> constexpr uint32_t pow5_tables<T>::large_step;
+template <typename T> constexpr limb_t pow5_tables<T>::large_step;

 template <typename T> constexpr uint64_t pow5_tables<T>::small_power_of_5[];

@ -435,14 +433,14 @@ struct bigint : pow5_tables<> {
  // storage of the limbs, in little-endian order.
  stackvec<bigint_limbs> vec;

-  FASTFLOAT_CONSTEXPR20 bigint() : vec() {}
+  FASTFLOAT_CONSTEXPR20 bigint() noexcept : vec() {}

  bigint(bigint const &) = delete;
  bigint &operator=(bigint const &) = delete;
  bigint(bigint &&) = delete;
  bigint &operator=(bigint &&other) = delete;

-  FASTFLOAT_CONSTEXPR20 bigint(uint64_t value) : vec() {
+  FASTFLOAT_CONSTEXPR20 bigint(uint64_t value) noexcept : vec() {
 #ifdef FASTFLOAT_64BIT_LIMB
    vec.push_unchecked(value);
 #else
@ -493,7 +491,7 @@ struct bigint : pow5_tables<> {
    } else if (vec.len() < other.vec.len()) {
      return -1;
    } else {
-      for (size_t index = vec.len(); index > 0; index--) {
+      for (limb_t index = vec.len(); index > 0; --index) {
        limb xi = vec[index - 1];
        limb yi = other.vec[index - 1];
        if (xi > yi) {
@ -508,7 +506,7 @@ struct bigint : pow5_tables<> {

  // shift left each limb n bits, carrying over to the new limb
  // returns true if we were able to shift all the digits.
-  FASTFLOAT_CONSTEXPR20 bool shl_bits(size_t n) noexcept {
+  FASTFLOAT_CONSTEXPR20 bool shl_bits(limb_t n) noexcept {
    // Internally, for each item, we shift left by n, and add the previous
    // right shifted limb-bits.
    // For example, we transform (for u8) shifted left 2, to:
@ -517,10 +515,10 @@ struct bigint : pow5_tables<> {
    FASTFLOAT_DEBUG_ASSERT(n != 0);
    FASTFLOAT_DEBUG_ASSERT(n < sizeof(limb) * 8);

-    size_t shl = n;
-    size_t shr = limb_bits - shl;
+    limb_t const shl = n;
+    limb_t const shr = limb_bits - shl;
    limb prev = 0;
-    for (size_t index = 0; index < vec.len(); index++) {
+    for (limb_t index = 0; index != vec.len(); ++index) {
      limb xi = vec[index];
      vec[index] = (xi << shl) | (prev >> shr);
      prev = xi;
@ -534,9 +532,10 @@ struct bigint : pow5_tables<> {
  }

  // move the limbs left by `n` limbs.
-  FASTFLOAT_CONSTEXPR20 bool shl_limbs(size_t n) noexcept {
+  FASTFLOAT_CONSTEXPR20 bool shl_limbs(limb_t n) noexcept {
    FASTFLOAT_DEBUG_ASSERT(n != 0);
    if (n + vec.len() > vec.capacity()) {
+      // we can't shift more than the capacity of the vector.
      return false;
    } else if (!vec.is_empty()) {
      // move limbs
@ -555,9 +554,9 @@ struct bigint : pow5_tables<> {
  }

  // move the limbs left by `n` bits.
-  FASTFLOAT_CONSTEXPR20 bool shl(size_t n) noexcept {
-    size_t rem = n % limb_bits;
-    size_t div = n / limb_bits;
+  FASTFLOAT_CONSTEXPR20 bool shl(bigint_bits_t n) noexcept {
+    auto const rem = static_cast<limb_t>(n % limb_bits);
+    auto const div = static_cast<limb_t>(n / limb_bits);
    if (rem != 0) {
      FASTFLOAT_TRY(shl_bits(rem));
    }
@ -568,8 +567,9 @@ struct bigint : pow5_tables<> {
  }

  // get the number of leading zeros in the bigint.
-  FASTFLOAT_CONSTEXPR20 int ctlz() const noexcept {
+  FASTFLOAT_CONSTEXPR20 bigint_bits_t ctlz() const noexcept {
    if (vec.is_empty()) {
+      // empty vector, no bits, no zeros.
      return 0;
    } else {
 #ifdef FASTFLOAT_64BIT_LIMB
@ -583,9 +583,9 @@ struct bigint : pow5_tables<> {
  }

  // get the number of bits in the bigint.
-  FASTFLOAT_CONSTEXPR20 int bit_length() const noexcept {
-    int lz = ctlz();
-    return int(limb_bits * vec.len()) - lz;
+  FASTFLOAT_CONSTEXPR20 bigint_bits_t bit_length() const noexcept {
+    auto lz = ctlz();
+    return limb_bits * vec.len() - lz;
  }

  FASTFLOAT_CONSTEXPR20 bool mul(limb y) noexcept { return small_mul(vec, y); }
@ -593,23 +593,27 @@ struct bigint : pow5_tables<> {
  FASTFLOAT_CONSTEXPR20 bool add(limb y) noexcept { return small_add(vec, y); }

  // multiply as if by 2 raised to a power.
-  FASTFLOAT_CONSTEXPR20 bool pow2(uint32_t exp) noexcept { return shl(exp); }
+  FASTFLOAT_CONSTEXPR20 bool pow2(am_pow_t const exp) noexcept {
+    FASTFLOAT_ASSERT(exp >= 0);
+    return shl(static_cast<fast_float::bigint_bits_t>(exp));
+  }

  // multiply as if by 5 raised to a power.
-  FASTFLOAT_CONSTEXPR20 bool pow5(uint32_t exp) noexcept {
+  FASTFLOAT_CONSTEXPR20 bool pow5(am_pow_t exp) noexcept {
+    FASTFLOAT_ASSERT(exp >= 0);
    // multiply by a power of 5
-    size_t large_length = sizeof(large_power_of_5) / sizeof(limb);
-    limb_span large = limb_span(large_power_of_5, large_length);
+    limb_t const large_length = sizeof(large_power_of_5) / sizeof(limb);
+    limb_span const large = limb_span(large_power_of_5, large_length);
    while (exp >= large_step) {
      FASTFLOAT_TRY(large_mul(vec, large));
      exp -= large_step;
    }
 #ifdef FASTFLOAT_64BIT_LIMB
-    uint32_t small_step = 27;
-    limb max_native = 7450580596923828125UL;
+    limb_t constexpr small_step = 27;
+    limb constexpr max_native = 7450580596923828125UL;
 #else
-    uint32_t small_step = 13;
-    limb max_native = 1220703125U;
+    limb_t constexpr small_step = 13;
+    limb constexpr max_native = 1220703125U;
 #endif
    while (exp >= small_step) {
      FASTFLOAT_TRY(small_mul(vec, max_native));
@ -627,7 +631,8 @@ struct bigint : pow5_tables<> {
  }

  // multiply as if by 10 raised to a power.
-  FASTFLOAT_CONSTEXPR20 bool pow10(uint32_t exp) noexcept {
+  FASTFLOAT_CONSTEXPR20 bool pow10(am_pow_t exp) noexcept {
+    FASTFLOAT_ASSERT(exp >= 0);
    FASTFLOAT_TRY(pow5(exp));
    return pow2(exp);
  }
--- a/include/fast_float/constexpr_feature_detect.h
+++ b/include/fast_float/constexpr_feature_detect.h
@ -7,13 +7,30 @@
 #endif
 #endif

-// Testing for https://wg21.link/N3652, adopted in C++14
-#if defined(__cpp_constexpr) && __cpp_constexpr >= 201304
+// C++14 constexpr
+#if defined(__cpp_constexpr) && __cpp_constexpr >= 201304L
+#define FASTFLOAT_CONSTEXPR14 constexpr
+#elif __cplusplus >= 201402L
+#define FASTFLOAT_CONSTEXPR14 constexpr
+#elif defined(_MSC_VER) && _MSC_VER >= 1910 && _MSVC_LANG >= 201402L
 #define FASTFLOAT_CONSTEXPR14 constexpr
 #else
 #define FASTFLOAT_CONSTEXPR14
 #endif

+// C++14 variable templates
+#if defined(__cpp_variable_templates) && __cpp_variable_templates >= 201304L
+#define FASTFLOAT_HAS_VARIABLE_TEMPLATES 1
+#elif __cplusplus >= 201402L
+#define FASTFLOAT_HAS_VARIABLE_TEMPLATES 1
+#elif defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 190023918L &&                 \
+    _MSVC_LANG >= 201402L
+#define FASTFLOAT_HAS_VARIABLE_TEMPLATES 1
+#else
+#define FASTFLOAT_HAS_VARIABLE_TEMPLATES 0
+#endif
+
+// C++20 std::bit_cast
 #if defined(__cpp_lib_bit_cast) && __cpp_lib_bit_cast >= 201806L
 #define FASTFLOAT_HAS_BIT_CAST 1
 #else
@ -23,16 +40,42 @@
 #if defined(__cpp_lib_is_constant_evaluated) &&                                \
    __cpp_lib_is_constant_evaluated >= 201811L
 #define FASTFLOAT_HAS_IS_CONSTANT_EVALUATED 1
+#define FASTFLOAT_CONSTEVAL consteval
 #else
 #define FASTFLOAT_HAS_IS_CONSTANT_EVALUATED 0
+#define FASTFLOAT_CONSTEVAL FASTFLOAT_CONSTEXPR14
 #endif

+#if defined(__cpp_lib_byteswap)
+#define FASTFLOAT_HAS_BYTESWAP 1
+#else
+#define FASTFLOAT_HAS_BYTESWAP 0
+#endif
+
+// C++17 if constexpr
 #if defined(__cpp_if_constexpr) && __cpp_if_constexpr >= 201606L
 #define FASTFLOAT_IF_CONSTEXPR17(x) if constexpr (x)
+#elif defined(__cpp_constexpr) && __cpp_constexpr >= 201603L
+#define FASTFLOAT_IF_CONSTEXPR17(x) if constexpr (x)
+#elif __cplusplus >= 201703L
+#define FASTFLOAT_IF_CONSTEXPR17(x) if constexpr (x)
+#elif defined(_MSC_VER) && _MSC_VER >= 1911 && _MSVC_LANG >= 201703L
+#define FASTFLOAT_IF_CONSTEXPR17(x) if constexpr (x)
 #else
 #define FASTFLOAT_IF_CONSTEXPR17(x) if (x)
 #endif

+// C++17 inline variables
+#if defined(__cpp_inline_variables) && __cpp_inline_variables >= 201606L
+#define FASTFLOAT_INLINE_VARIABLE inline constexpr
+#elif __cplusplus >= 201703L
+#define FASTFLOAT_INLINE_VARIABLE inline constexpr
+#elif defined(_MSC_VER) && _MSC_VER >= 1912 && _MSVC_LANG >= 201703L
+#define FASTFLOAT_INLINE_VARIABLE inline constexpr
+#else
+#define FASTFLOAT_INLINE_VARIABLE static constexpr
+#endif
+
 // Testing for relevant C++20 constexpr library features
 #if FASTFLOAT_HAS_IS_CONSTANT_EVALUATED && FASTFLOAT_HAS_BIT_CAST &&           \
    defined(__cpp_lib_constexpr_algorithms) &&                                 \
@ -50,4 +93,17 @@
 #define FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE 1
 #endif

+#if defined(__has_builtin)
+#define FASTFLOAT_HAS_BUILTIN(x) __has_builtin(x)
+#else
+#define FASTFLOAT_HAS_BUILTIN(x) false
+#endif
+
+// For support attribute [[assume]] is declared in P1774
+#if defined(__cpp_attrubute_assume)
+#define FASTFLOAT_ASSUME(expr) [[assume(expr)]]
+#else
+#define FASTFLOAT_ASSUME(expr)
+#endif
+
 #endif // FASTFLOAT_CONSTEXPR_FEATURE_DETECT_H
--- a/include/fast_float/decimal_to_binary.h
+++ b/include/fast_float/decimal_to_binary.h
@ -17,30 +17,32 @@ namespace fast_float {
 // most significant bits and the low part corresponding to the least significant
 // bits.
 //
-template <int bit_precision>
+template <am_bits_t bit_precision>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20 value128
-compute_product_approximation(int64_t q, uint64_t w) {
-  int const index = 2 * int(q - powers::smallest_power_of_five);
+compute_product_approximation(am_pow_t q, am_mant_t w) noexcept {
+  am_pow_t const index = 2 * (q - powers::smallest_power_of_five);
  // For small values of q, e.g., q in [0,27], the answer is always exact
  // because The line value128 firstproduct = full_multiplication(w,
  // power_of_five_128[index]); gives the exact answer.
  value128 firstproduct =
      full_multiplication(w, powers::power_of_five_128[index]);
  static_assert((bit_precision >= 0) && (bit_precision <= 64),
-                " precision should  be in (0,64]");
+                " precision should be in [0,64]");
  constexpr uint64_t precision_mask =
      (bit_precision < 64) ? (uint64_t(0xFFFFFFFFFFFFFFFF) >> bit_precision)
                           : uint64_t(0xFFFFFFFFFFFFFFFF);
+
  if ((firstproduct.high & precision_mask) ==
      precision_mask) { // could further guard with  (lower + w < lower)
    // regarding the second product, we only need secondproduct.high, but our
    // expectation is that the compiler will optimize this extra work away if
    // needed.
-    value128 secondproduct =
+    value128 const secondproduct =
        full_multiplication(w, powers::power_of_five_128[index + 1]);
    firstproduct.low += secondproduct.high;
+
    if (secondproduct.high > firstproduct.low) {
-      firstproduct.high++;
+      ++firstproduct.high;
    }
  }
  return firstproduct;
@ -62,7 +64,7 @@ namespace detail {
 * where
 *   p = log(5**-q)/log(2) = -q * log(5)/log(2)
 */
-constexpr fastfloat_really_inline int32_t power(int32_t q) noexcept {
+constexpr fastfloat_really_inline am_pow_t power(am_pow_t const q) noexcept {
  return (((152170 + 65536) * q) >> 16) + 63;
 }
 } // namespace detail
@ -71,13 +73,13 @@ constexpr fastfloat_really_inline int32_t power(int32_t q) noexcept {
 // for significant digits already multiplied by 10 ** q.
 template <typename binary>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR14 adjusted_mantissa
-compute_error_scaled(int64_t q, uint64_t w, int lz) noexcept {
-  int hilz = int(w >> 63) ^ 1;
+compute_error_scaled(am_pow_t q, am_mant_t w, limb_t lz) noexcept {
+  auto const hilz = static_cast<am_bits_t>((w >> 63) ^ 1);
  adjusted_mantissa answer;
  answer.mantissa = w << hilz;
-  int bias = binary::mantissa_explicit_bits() - binary::minimum_exponent();
-  answer.power2 = int32_t(detail::power(int32_t(q)) + bias - hilz - lz - 62 +
-                          invalid_am_bias);
+  constexpr am_pow_t bias =
+      binary::mantissa_explicit_bits() - binary::minimum_exponent();
+  answer.power2 = detail::power(q) + bias - hilz - lz - 62 + invalid_am_bias;
  return answer;
 }

@ -85,10 +87,10 @@ compute_error_scaled(int64_t q, uint64_t w, int lz) noexcept {
 // the power2 in the exponent will be adjusted by invalid_am_bias.
 template <typename binary>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
-compute_error(int64_t q, uint64_t w) noexcept {
-  int lz = leading_zeroes(w);
+compute_error(am_pow_t q, am_mant_t w) noexcept {
+  auto const lz = leading_zeroes(w);
  w <<= lz;
-  value128 product =
+  value128 const product =
      compute_product_approximation<binary::mantissa_explicit_bits() + 3>(q, w);
  return compute_error_scaled<binary>(q, product.high, lz);
 }
@ -100,12 +102,12 @@ compute_error(int64_t q, uint64_t w) noexcept {
 // should recompute in such cases.
 template <typename binary>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
-compute_float(int64_t q, uint64_t w) noexcept {
+compute_float(am_pow_t q, am_mant_t w) noexcept {
  adjusted_mantissa answer;
  if ((w == 0) || (q < binary::smallest_power_of_ten())) {
+    // we want to get zero:
    answer.power2 = 0;
    answer.mantissa = 0;
-    // result should be zero
    return answer;
  }
  if (q > binary::largest_power_of_ten()) {
@ -114,11 +116,12 @@ compute_float(int64_t q, uint64_t w) noexcept {
    answer.mantissa = 0;
    return answer;
  }
+
  // At this point in time q is in [powers::smallest_power_of_five,
  // powers::largest_power_of_five].

  // We want the most significant bit of i to be 1. Shift if needed.
-  int lz = leading_zeroes(w);
+  auto const lz = leading_zeroes(w);
  w <<= lz;

  // The required precision is binary::mantissa_explicit_bits() + 3 because
@ -127,7 +130,7 @@ compute_float(int64_t q, uint64_t w) noexcept {
  // 3. We might lose a bit due to the "upperbit" routine (result too small,
  // requiring a shift)

-  value128 product =
+  value128 const product =
      compute_product_approximation<binary::mantissa_explicit_bits() + 3>(q, w);
  // The computed 'product' is always sufficient.
  // Mathematical proof:
@ -138,14 +141,18 @@ compute_float(int64_t q, uint64_t w) noexcept {
  // branchless approach: value128 product = compute_product(q, w); but in
  // practice, we can win big with the compute_product_approximation if its
  // additional branch is easily predicted. Which is best is data specific.
-  int upperbit = int(product.high >> 63);
-  int shift = upperbit + 64 - binary::mantissa_explicit_bits() - 3;
+  auto const upperbit = static_cast<am_bits_t>(product.high >> 63);
+  auto const shift = static_cast<am_bits_t>(
+      upperbit + 64 - binary::mantissa_explicit_bits() - 3);

+  // Shift right the mantissa to the correct position
  answer.mantissa = product.high >> shift;

-  answer.power2 = int32_t(detail::power(int32_t(q)) + upperbit - lz -
-                          binary::minimum_exponent());
-  if (answer.power2 <= 0) { // we have a subnormal?
+  answer.power2 = detail::power(q) + upperbit - lz - binary::minimum_exponent();
+
+  // Now, we need to round the mantissa correctly.
+
+  if (answer.power2 <= 0) { // we have a subnormal or very small value.
    // Here have that answer.power2 <= 0 so -answer.power2 >= 0
    if (-answer.power2 + 1 >=
        64) { // if we have more than 64 bits below the minimum exponent, you
@ -155,6 +162,7 @@ compute_float(int64_t q, uint64_t w) noexcept {
      // result should be zero
      return answer;
    }
+    // We have a subnormal number. We need to shift the mantissa to the right
    // next line is safe because -answer.power2 + 1 < 64
    answer.mantissa >>= -answer.power2 + 1;
    // Thankfully, we can't have both "round-to-even" and subnormals because
@ -170,7 +178,7 @@ compute_float(int64_t q, uint64_t w) noexcept {
    // subnormal, but we can only know this after rounding.
    // So we only declare a subnormal if we are smaller than the threshold.
    answer.power2 =
-        (answer.mantissa < (uint64_t(1) << binary::mantissa_explicit_bits()))
+        (answer.mantissa < (am_mant_t(1) << binary::mantissa_explicit_bits()))
            ? 0
            : 1;
    return answer;
@ -188,22 +196,25 @@ compute_float(int64_t q, uint64_t w) noexcept {
    // ... we dropped out only zeroes. But if this happened, then we can go
    // back!!!
    if ((answer.mantissa << shift) == product.high) {
-      answer.mantissa &= ~uint64_t(1); // flip it so that we do not round up
+      answer.mantissa &= ~am_mant_t(1); // flip it so that we do not round up
    }
  }

+  // Normal rounding
  answer.mantissa += (answer.mantissa & 1); // round up
  answer.mantissa >>= 1;
-  if (answer.mantissa >= (uint64_t(2) << binary::mantissa_explicit_bits())) {
-    answer.mantissa = (uint64_t(1) << binary::mantissa_explicit_bits());
-    answer.power2++; // undo previous addition
+  if (answer.mantissa >= (am_mant_t(2) << binary::mantissa_explicit_bits())) {
+    answer.mantissa = (am_mant_t(1) << binary::mantissa_explicit_bits());
+    ++answer.power2; // undo previous line addition
  }

-  answer.mantissa &= ~(uint64_t(1) << binary::mantissa_explicit_bits());
+  // Check if we have infinity after computation
+  answer.mantissa &= ~(am_mant_t(1) << binary::mantissa_explicit_bits());
  if (answer.power2 >= binary::infinite_power()) { // infinity
    answer.power2 = binary::infinite_power();
    answer.mantissa = 0;
  }
+
  return answer;
 }

--- a/include/fast_float/digit_comparison.h
+++ b/include/fast_float/digit_comparison.h
@ -38,8 +38,8 @@ constexpr static uint64_t powers_of_ten_uint64[] = {1UL,
 // this algorithm is not even close to optimized, but it has no practical
 // effect on performance: in order to have a faster algorithm, we'd need
 // to slow down performance for faster algorithms, and this is still fast.
-fastfloat_really_inline FASTFLOAT_CONSTEXPR14 int32_t
-scientific_exponent(uint64_t mantissa, int32_t exponent) noexcept {
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14 am_pow_t
+scientific_exponent(am_mant_t mantissa, am_pow_t exponent) noexcept {
  while (mantissa >= 10000) {
    mantissa /= 10000;
    exponent += 4;
@ -58,29 +58,26 @@ scientific_exponent(uint64_t mantissa, int32_t exponent) noexcept {
 // this converts a native floating-point number to an extended-precision float.
 template <typename T>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
-to_extended(T value) noexcept {
+to_extended(T const value) noexcept {
  using equiv_uint = equiv_uint_t<T>;
  constexpr equiv_uint exponent_mask = binary_format<T>::exponent_mask();
  constexpr equiv_uint mantissa_mask = binary_format<T>::mantissa_mask();
  constexpr equiv_uint hidden_bit_mask = binary_format<T>::hidden_bit_mask();

  adjusted_mantissa am;
-  int32_t bias = binary_format<T>::mantissa_explicit_bits() -
-                 binary_format<T>::minimum_exponent();
-  equiv_uint bits;
-#if FASTFLOAT_HAS_BIT_CAST
-  bits = std::bit_cast<equiv_uint>(value);
-#else
-  ::memcpy(&bits, &value, sizeof(T));
-#endif
+  constexpr am_pow_t bias = binary_format<T>::mantissa_explicit_bits() -
+                            binary_format<T>::minimum_exponent();
+
+  auto const bits = bit_cast<equiv_uint>(value);
+
  if ((bits & exponent_mask) == 0) {
    // denormal
    am.power2 = 1 - bias;
    am.mantissa = bits & mantissa_mask;
  } else {
    // normal
-    am.power2 = int32_t((bits & exponent_mask) >>
-                        binary_format<T>::mantissa_explicit_bits());
+    am.power2 = static_cast<am_pow_t>(
+        (bits & exponent_mask) >> binary_format<T>::mantissa_explicit_bits());
    am.power2 -= bias;
    am.mantissa = (bits & mantissa_mask) | hidden_bit_mask;
  }
@ -93,7 +90,7 @@ to_extended(T value) noexcept {
 // halfway between b and b+u.
 template <typename T>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
-to_extended_halfway(T value) noexcept {
+to_extended_halfway(T const value) noexcept {
  adjusted_mantissa am = to_extended(value);
  am.mantissa <<= 1;
  am.mantissa += 1;
@ -105,14 +102,15 @@ to_extended_halfway(T value) noexcept {
 template <typename T, typename callback>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void round(adjusted_mantissa &am,
                                                         callback cb) noexcept {
-  int32_t mantissa_shift = 64 - binary_format<T>::mantissa_explicit_bits() - 1;
+  constexpr am_pow_t mantissa_shift =
+      64 - binary_format<T>::mantissa_explicit_bits() - 1;
  if (-am.power2 >= mantissa_shift) {
    // have a denormal float
-    int32_t shift = -am.power2 + 1;
-    cb(am, std::min<int32_t>(shift, 64));
+    am_pow_t shift = -am.power2 + 1;
+    cb(am, std::min<am_pow_t>(shift, 64));
    // check for round-up: if rounding-nearest carried us to the hidden bit.
    am.power2 = (am.mantissa <
-                 (uint64_t(1) << binary_format<T>::mantissa_explicit_bits()))
+                 (am_mant_t(1) << binary_format<T>::mantissa_explicit_bits()))
                    ? 0
                    : 1;
    return;
@ -123,13 +121,13 @@ fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void round(adjusted_mantissa &am,

  // check for carry
  if (am.mantissa >=
-      (uint64_t(2) << binary_format<T>::mantissa_explicit_bits())) {
-    am.mantissa = (uint64_t(1) << binary_format<T>::mantissa_explicit_bits());
-    am.power2++;
+      (am_mant_t(2) << binary_format<T>::mantissa_explicit_bits())) {
+    am.mantissa = (am_mant_t(1) << binary_format<T>::mantissa_explicit_bits());
+    ++am.power2;
  }

  // check for infinite: we could have carried to an infinite power
-  am.mantissa &= ~(uint64_t(1) << binary_format<T>::mantissa_explicit_bits());
+  am.mantissa &= ~(am_mant_t(1) << binary_format<T>::mantissa_explicit_bits());
  if (am.power2 >= binary_format<T>::infinite_power()) {
    am.power2 = binary_format<T>::infinite_power();
    am.mantissa = 0;
@ -138,11 +136,12 @@ fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void round(adjusted_mantissa &am,

 template <typename callback>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void
-round_nearest_tie_even(adjusted_mantissa &am, int32_t shift,
+round_nearest_tie_even(adjusted_mantissa &am, am_pow_t shift,
                       callback cb) noexcept {
-  uint64_t const mask = (shift == 64) ? UINT64_MAX : (uint64_t(1) << shift) - 1;
-  uint64_t const halfway = (shift == 0) ? 0 : uint64_t(1) << (shift - 1);
-  uint64_t truncated_bits = am.mantissa & mask;
+  am_mant_t const mask = (shift == 64) ? std::numeric_limits<am_mant_t>::max()
+                                       : (am_mant_t(1) << shift) - 1;
+  am_mant_t const halfway = (shift == 0) ? 0 : am_mant_t(1) << (shift - 1);
+  am_mant_t truncated_bits = am.mantissa & mask;
  bool is_above = truncated_bits > halfway;
  bool is_halfway = truncated_bits == halfway;

@ -155,11 +154,11 @@ round_nearest_tie_even(adjusted_mantissa &am, int32_t shift,
  am.power2 += shift;

  bool is_odd = (am.mantissa & 1) == 1;
-  am.mantissa += uint64_t(cb(is_odd, is_halfway, is_above));
+  am.mantissa += am_mant_t(cb(is_odd, is_halfway, is_above));
 }

 fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void
-round_down(adjusted_mantissa &am, int32_t shift) noexcept {
+round_down(adjusted_mantissa &am, am_pow_t shift) noexcept {
  if (shift == 64) {
    am.mantissa = 0;
  } else {
@ -171,9 +170,9 @@ round_down(adjusted_mantissa &am, int32_t shift) noexcept {
 template <typename UC>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void
 skip_zeros(UC const *&first, UC const *last) noexcept {
-  uint64_t val;
  while (!cpp20_and_in_constexpr() &&
         std::distance(first, last) >= int_cmp_len<UC>()) {
+    uint64_t val;
    ::memcpy(&val, first, sizeof(uint64_t));
    if (val != int_cmp_zeros<UC>()) {
      break;
@ -184,7 +183,7 @@ skip_zeros(UC const *&first, UC const *last) noexcept {
    if (*first != UC('0')) {
      break;
    }
-    first++;
+    ++first;
  }
 }

@ -194,9 +193,9 @@ template <typename UC>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool
 is_truncated(UC const *first, UC const *last) noexcept {
  // do 8-bit optimizations, can just compare to 8 literal 0s.
-  uint64_t val;
  while (!cpp20_and_in_constexpr() &&
         std::distance(first, last) >= int_cmp_len<UC>()) {
+    uint64_t val;
    ::memcpy(&val, first, sizeof(uint64_t));
    if (val != int_cmp_zeros<UC>()) {
      return true;
@ -220,8 +219,8 @@ is_truncated(span<UC const> s) noexcept {

 template <typename UC>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void
-parse_eight_digits(UC const *&p, limb &value, size_t &counter,
-                   size_t &count) noexcept {
+parse_eight_digits(UC const *&p, limb &value, am_digits &counter,
+                   am_digits &count) noexcept {
  value = value * 100000000 + parse_eight_digits_unrolled(p);
  p += 8;
  counter += 8;
@ -230,12 +229,12 @@ parse_eight_digits(UC const *&p, limb &value, size_t &counter,

 template <typename UC>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void
-parse_one_digit(UC const *&p, limb &value, size_t &counter,
-                size_t &count) noexcept {
+parse_one_digit(UC const *&p, limb &value, am_digits &counter,
+                am_digits &count) noexcept {
  value = value * 10 + limb(*p - UC('0'));
-  p++;
-  counter++;
-  count++;
+  ++p;
+  ++counter;
+  ++count;
 }

 fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void
@ -245,28 +244,28 @@ add_native(bigint &big, limb power, limb value) noexcept {
 }

 fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void
-round_up_bigint(bigint &big, size_t &count) noexcept {
+round_up_bigint(bigint &big, am_digits &count) noexcept {
  // need to round-up the digits, but need to avoid rounding
  // ....9999 to ...10000, which could cause a false halfway point.
  add_native(big, 10, 1);
-  count++;
+  ++count;
 }

 // parse the significant digits into a big integer
-template <typename UC>
-inline FASTFLOAT_CONSTEXPR20 void
-parse_mantissa(bigint &result, parsed_number_string_t<UC> &num,
-               size_t max_digits, size_t &digits) noexcept {
+template <typename T, typename UC>
+inline FASTFLOAT_CONSTEXPR20 am_digits
+parse_mantissa(bigint &result, const parsed_number_string_t<UC> &num) noexcept {
  // try to minimize the number of big integer and scalar multiplication.
  // therefore, try to parse 8 digits at a time, and multiply by the largest
  // scalar value (9 or 19 digits) for each step.
-  size_t counter = 0;
-  digits = 0;
+  constexpr am_digits max_digits = binary_format<T>::max_digits();
+  am_digits counter = 0;
+  am_digits digits = 0;
  limb value = 0;
 #ifdef FASTFLOAT_64BIT_LIMB
-  size_t step = 19;
+  constexpr am_digits step = 19;
 #else
-  size_t step = 9;
+  constexpr am_digits step = 9;
 #endif

  // process all integer digits.
@ -292,7 +291,7 @@ parse_mantissa(bigint &result, parsed_number_string_t<UC> &num,
      if (truncated) {
        round_up_bigint(result, digits);
      }
-      return;
+      return digits;
    } else {
      add_native(result, limb(powers_of_ten_uint64[counter]), value);
      counter = 0;
@ -323,7 +322,7 @@ parse_mantissa(bigint &result, parsed_number_string_t<UC> &num,
        if (truncated) {
          round_up_bigint(result, digits);
        }
-        return;
+        return digits;
      } else {
        add_native(result, limb(powers_of_ten_uint64[counter]), value);
        counter = 0;
@ -335,20 +334,21 @@ parse_mantissa(bigint &result, parsed_number_string_t<UC> &num,
  if (counter != 0) {
    add_native(result, limb(powers_of_ten_uint64[counter]), value);
  }
+  return digits;
 }

 template <typename T>
-inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
-positive_digit_comp(bigint &bigmant, int32_t exponent) noexcept {
-  FASTFLOAT_ASSERT(bigmant.pow10(uint32_t(exponent)));
-  adjusted_mantissa answer;
+inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa positive_digit_comp(
+    bigint &bigmant, adjusted_mantissa am, am_pow_t const exponent) noexcept {
+  FASTFLOAT_ASSERT(bigmant.pow10(exponent));
  bool truncated;
-  answer.mantissa = bigmant.hi64(truncated);
-  int bias = binary_format<T>::mantissa_explicit_bits() -
-             binary_format<T>::minimum_exponent();
-  answer.power2 = bigmant.bit_length() - 64 + bias;
+  am.mantissa = bigmant.hi64(truncated);
+  constexpr am_pow_t bias = binary_format<T>::mantissa_explicit_bits() -
+                            binary_format<T>::minimum_exponent();
+  am.power2 =
+      static_cast<fast_float::am_pow_t>(bigmant.bit_length() - 64 + bias);

-  round<T>(answer, [truncated](adjusted_mantissa &a, int32_t shift) {
+  round<T>(am, [truncated](adjusted_mantissa &a, am_pow_t shift) {
    round_nearest_tie_even(
        a, shift,
        [truncated](bool is_odd, bool is_halfway, bool is_above) -> bool {
@ -357,7 +357,7 @@ positive_digit_comp(bigint &bigmant, int32_t exponent) noexcept {
        });
  });

-  return answer;
+  return am;
 }

 // the scaling here is quite simple: we have, for the real digits `m * 10^e`,
@ -366,39 +366,40 @@ positive_digit_comp(bigint &bigmant, int32_t exponent) noexcept {
 // we then need to scale by `2^(f- e)`, and then the two significant digits
 // are of the same magnitude.
 template <typename T>
-inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa negative_digit_comp(
-    bigint &bigmant, adjusted_mantissa am, int32_t exponent) noexcept {
-  bigint &real_digits = bigmant;
-  int32_t real_exp = exponent;
-
+inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
+negative_digit_comp(bigint &real_digits, adjusted_mantissa am,
+                    am_pow_t const real_exp) noexcept {
  // get the value of `b`, rounded down, and get a bigint representation of b+h
  adjusted_mantissa am_b = am;
-  // gcc7 buf: use a lambda to remove the noexcept qualifier bug with
+  // gcc7 bug: use a lambda to remove the noexcept qualifier bug with
  // -Wnoexcept-type.
  round<T>(am_b,
-           [](adjusted_mantissa &a, int32_t shift) { round_down(a, shift); });
+           [](adjusted_mantissa &a, am_pow_t shift) { round_down(a, shift); });
  T b;
-  to_float(false, am_b, b);
-  adjusted_mantissa theor = to_extended_halfway(b);
+  to_float(
+#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
+      false,
+#endif
+      am_b, b);
+  adjusted_mantissa const theor = to_extended_halfway(b);
  bigint theor_digits(theor.mantissa);
-  int32_t theor_exp = theor.power2;
+  am_pow_t const theor_exp = theor.power2;

  // scale real digits and theor digits to be same power.
-  int32_t pow2_exp = theor_exp - real_exp;
-  uint32_t pow5_exp = uint32_t(-real_exp);
+  auto const pow2_exp = theor_exp - real_exp;
+  auto const pow5_exp = -real_exp;
  if (pow5_exp != 0) {
    FASTFLOAT_ASSERT(theor_digits.pow5(pow5_exp));
  }
  if (pow2_exp > 0) {
-    FASTFLOAT_ASSERT(theor_digits.pow2(uint32_t(pow2_exp)));
+    FASTFLOAT_ASSERT(theor_digits.pow2(pow2_exp));
  } else if (pow2_exp < 0) {
-    FASTFLOAT_ASSERT(real_digits.pow2(uint32_t(-pow2_exp)));
+    FASTFLOAT_ASSERT(real_digits.pow2(-pow2_exp));
  }

  // compare digits, and use it to direct rounding
-  int ord = real_digits.compare(theor_digits);
-  adjusted_mantissa answer = am;
-  round<T>(answer, [ord](adjusted_mantissa &a, int32_t shift) {
+  auto const ord = real_digits.compare(theor_digits);
+  round<T>(am, [ord](adjusted_mantissa &a, am_pow_t shift) {
    round_nearest_tie_even(
        a, shift, [ord](bool is_odd, bool _, bool __) -> bool {
          (void)_;  // not needed, since we've done our comparison
@ -413,7 +414,7 @@ inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa negative_digit_comp(
        });
  });

-  return answer;
+  return am;
 }

 // parse the significant digits as a big integer to unambiguously round
@ -430,21 +431,18 @@ inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa negative_digit_comp(
 // the actual digits. we then compare the big integer representations
 // of both, and use that to direct rounding.
 template <typename T, typename UC>
-inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
-digit_comp(parsed_number_string_t<UC> &num, adjusted_mantissa am) noexcept {
+inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa digit_comp(
+    parsed_number_string_t<UC> const &num, adjusted_mantissa am) noexcept {
  // remove the invalid exponent bias
  am.power2 -= invalid_am_bias;

-  int32_t sci_exp =
-      scientific_exponent(num.mantissa, static_cast<int32_t>(num.exponent));
-  size_t max_digits = binary_format<T>::max_digits();
-  size_t digits = 0;
+  am_pow_t const sci_exp = scientific_exponent(num.mantissa, num.exponent);
  bigint bigmant;
-  parse_mantissa(bigmant, num, max_digits, digits);
+  am_digits const digits = parse_mantissa<T, UC>(bigmant, num);
  // can't underflow, since digits is at most max_digits.
-  int32_t exponent = sci_exp + 1 - int32_t(digits);
+  am_pow_t const exponent = sci_exp + 1 - static_cast<am_pow_t>(digits);
  if (exponent >= 0) {
-    return positive_digit_comp<T>(bigmant, exponent);
+    return positive_digit_comp<T>(bigmant, am, exponent);
  } else {
    return negative_digit_comp<T>(bigmant, am, exponent);
  }
--- a/include/fast_float/fast_float.h
+++ b/include/fast_float/fast_float.h
@ -34,7 +34,7 @@ template <typename T, typename UC = char,
          typename = FASTFLOAT_ENABLE_IF(is_supported_float_type<T>::value)>
 FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
 from_chars(UC const *first, UC const *last, T &value,
-           chars_format fmt = chars_format::general) noexcept;
+           chars_format const fmt = chars_format::general) noexcept;

 /**
 * Like from_chars, but accepts an `options` argument to govern number parsing.
@ -43,7 +43,7 @@ from_chars(UC const *first, UC const *last, T &value,
 template <typename T, typename UC = char>
 FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
 from_chars_advanced(UC const *first, UC const *last, T &value,
-                    parse_options_t<UC> options) noexcept;
+                    parse_options_t<UC> const options) noexcept;

 /**
 * This function multiplies an integer number by a power of 10 and returns
@ -59,9 +59,11 @@ from_chars_advanced(UC const *first, UC const *last, T &value,
 * `new` or `malloc`).
 */
 FASTFLOAT_CONSTEXPR20 inline double
-integer_times_pow10(uint64_t mantissa, int decimal_exponent) noexcept;
+integer_times_pow10(uint64_t const mantissa,
+                    int const decimal_exponent) noexcept;
 FASTFLOAT_CONSTEXPR20 inline double
-integer_times_pow10(int64_t mantissa, int decimal_exponent) noexcept;
+integer_times_pow10(int64_t const mantissa,
+                    int const decimal_exponent) noexcept;

 /**
 * This function is a template overload of `integer_times_pow10()`
@ -71,11 +73,13 @@ integer_times_pow10(int64_t mantissa, int decimal_exponent) noexcept;
 template <typename T>
 FASTFLOAT_CONSTEXPR20
    typename std::enable_if<is_supported_float_type<T>::value, T>::type
-    integer_times_pow10(uint64_t mantissa, int decimal_exponent) noexcept;
+    integer_times_pow10(uint64_t const mantissa,
+                        int const decimal_exponent) noexcept;
 template <typename T>
 FASTFLOAT_CONSTEXPR20
    typename std::enable_if<is_supported_float_type<T>::value, T>::type
-    integer_times_pow10(int64_t mantissa, int decimal_exponent) noexcept;
+    integer_times_pow10(int64_t const mantissa,
+                        int const decimal_exponent) noexcept;

 /**
 * from_chars for integer types.
@ -83,7 +87,8 @@ FASTFLOAT_CONSTEXPR20
 template <typename T, typename UC = char,
          typename = FASTFLOAT_ENABLE_IF(is_supported_integer_type<T>::value)>
 FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
-from_chars(UC const *first, UC const *last, T &value, int base = 10) noexcept;
+from_chars(UC const *first, UC const *last, T &value,
+           int const base = 10) noexcept;

 } // namespace fast_float

--- a/include/fast_float/fast_table.h
+++ b/include/fast_float/fast_table.h
@ -1,8 +1,6 @@
 #ifndef FASTFLOAT_FAST_TABLE_H
 #define FASTFLOAT_FAST_TABLE_H

-#include <cstdint>
-
 namespace fast_float {

 /**
@ -30,15 +28,14 @@ namespace fast_float {
 * of 5 greater than 308.
 */
 template <class unused = void> struct powers_template {
-
-  constexpr static int smallest_power_of_five =
+  constexpr static am_pow_t smallest_power_of_five =
      binary_format<double>::smallest_power_of_ten();
-  constexpr static int largest_power_of_five =
+  constexpr static am_pow_t largest_power_of_five =
      binary_format<double>::largest_power_of_ten();
-  constexpr static int number_of_entries =
+  constexpr static am_pow_t number_of_entries =
      2 * (largest_power_of_five - smallest_power_of_five + 1);
  // Powers of five from 5^-342 all the way to 5^308 rounded toward one.
-  constexpr static uint64_t power_of_five_128[number_of_entries] = {
+  constexpr static am_mant_t power_of_five_128[number_of_entries] = {
      0xeef453d6923bd65a, 0x113faa2906a13b3f,
      0x9558b4661b6565f8, 0x4ac7ca59a424c507,
      0xbaaee17fa23ebf76, 0x5d79bcf00d2df649,
@ -696,7 +693,7 @@ template <class unused = void> struct powers_template {
 #if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE

 template <class unused>
-constexpr uint64_t
+constexpr am_mant_t
    powers_template<unused>::power_of_five_128[number_of_entries];

 #endif
--- a/include/fast_float/float_common.h
+++ b/include/fast_float/float_common.h
--- a/include/fast_float/parse_number.h
+++ b/include/fast_float/parse_number.h
@ -14,26 +14,30 @@
 namespace fast_float {

 namespace detail {
+#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
 /**
- * Special case +inf, -inf, nan, infinity, -infinity.
+ * Special case inf, +inf, -inf, nan, infinity, -infinity.
 * The case comparisons could be made much faster given that we know that the
 * strings a null-free and fixed.
 **/
 template <typename T, typename UC>
 from_chars_result_t<UC>
    FASTFLOAT_CONSTEXPR14 parse_infnan(UC const *first, UC const *last,
-                                       T &value, chars_format fmt) noexcept {
+                                       T &value,
+                                       const chars_format fmt) noexcept {
  from_chars_result_t<UC> answer{};
  answer.ptr = first;
  answer.ec = std::errc(); // be optimistic
-  // assume first < last, so dereference without checks;
+
+  FASTFLOAT_ASSUME(first < last); // so dereference without checks
+
  bool const minusSign = (*first == UC('-'));
  // C++17 20.19.3.(7.1) explicitly forbids '+' sign here
-  if ((*first == UC('-')) ||
-      (uint64_t(fmt & chars_format::allow_leading_plus) &&
-       (*first == UC('+')))) {
+  if (minusSign || ((chars_format_t(fmt & chars_format::allow_leading_plus)) &&
+                    (*first == UC('+')))) {
    ++first;
  }
+
  if (last - first >= 3) {
    if (fastfloat_strncasecmp3(first, str_const_nan<UC>())) {
      answer.ptr = (first += 3);
@ -42,7 +46,7 @@ from_chars_result_t<UC>
      // Check for possible nan(n-char-seq-opt), C++17 20.19.3.7,
      // C11 7.20.1.3.3. At least MSVC produces nan(ind) and nan(snan).
      if (first != last && *first == UC('(')) {
-        for (UC const *ptr = first + 1; ptr != last; ++ptr) {
+        for (auto const *ptr = first + 1; ptr != last; ++ptr) {
          if (*ptr == UC(')')) {
            answer.ptr = ptr + 1; // valid nan(n-char-seq-opt)
            break;
@ -69,7 +73,9 @@ from_chars_result_t<UC>
  answer.ec = std::errc::invalid_argument;
  return answer;
 }
+#endif

+#ifndef FASTFLOAT_ONLY_ROUNDS_TO_NEAREST_SUPPORTED
 /**
 * Returns true if the floating-pointing rounding mode is to 'nearest'.
 * It is the default on most system. This function is meant to be inexpensive.
@ -134,6 +140,7 @@ fastfloat_really_inline bool rounds_to_nearest() noexcept {
 #pragma GCC diagnostic pop
 #endif
 }
+#endif

 } // namespace detail

@ -141,7 +148,7 @@ template <typename T> struct from_chars_caller {
  template <typename UC>
  FASTFLOAT_CONSTEXPR20 static from_chars_result_t<UC>
  call(UC const *first, UC const *last, T &value,
-       parse_options_t<UC> options) noexcept {
+       parse_options_t<UC> const options) noexcept {
    return from_chars_advanced(first, last, value, options);
  }
 };
@ -151,7 +158,7 @@ template <> struct from_chars_caller<std::float32_t> {
  template <typename UC>
  FASTFLOAT_CONSTEXPR20 static from_chars_result_t<UC>
  call(UC const *first, UC const *last, std::float32_t &value,
-       parse_options_t<UC> options) noexcept {
+       parse_options_t<UC> const options) noexcept {
    // if std::float32_t is defined, and we are in C++23 mode; macro set for
    // float32; set value to float due to equivalence between float and
    // float32_t
@ -168,7 +175,7 @@ template <> struct from_chars_caller<std::float64_t> {
  template <typename UC>
  FASTFLOAT_CONSTEXPR20 static from_chars_result_t<UC>
  call(UC const *first, UC const *last, std::float64_t &value,
-       parse_options_t<UC> options) noexcept {
+       parse_options_t<UC> const options) noexcept {
    // if std::float64_t is defined, and we are in C++23 mode; macro set for
    // float64; set value as double due to equivalence between double and
    // float64_t
@ -183,14 +190,17 @@ template <> struct from_chars_caller<std::float64_t> {
 template <typename T, typename UC, typename>
 FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
 from_chars(UC const *first, UC const *last, T &value,
-           chars_format fmt /*= chars_format::general*/) noexcept {
+           chars_format const fmt /*= chars_format::general*/) noexcept {
  return from_chars_caller<T>::call(first, last, value,
                                    parse_options_t<UC>(fmt));
 }

 template <typename T>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool
-clinger_fast_path_impl(uint64_t mantissa, int64_t exponent, bool is_negative,
+clinger_fast_path_impl(am_mant_t const mantissa, am_pow_t const exponent,
+#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
+                       bool const is_negative,
+#endif
                       T &value) noexcept {
  // The implementation of the Clinger's fast path is convoluted because
  // we want round-to-nearest in all cases, irrespective of the rounding mode
@ -206,7 +216,9 @@ clinger_fast_path_impl(uint64_t mantissa, int64_t exponent, bool is_negative,
    // We could check it first (before the previous branch), but
    // there might be performance advantages at having the check
    // be last.
+#ifndef FASTFLOAT_ONLY_ROUNDS_TO_NEAREST_SUPPORTED
    if (!cpp20_and_in_constexpr() && detail::rounds_to_nearest()) {
+#endif
      // We have that fegetround() == FE_TONEAREST.
      // Next is Clinger's fast path.
      if (mantissa <= binary_format<T>::max_mantissa_fast_path()) {
@ -216,11 +228,14 @@ clinger_fast_path_impl(uint64_t mantissa, int64_t exponent, bool is_negative,
        } else {
          value = value * binary_format<T>::exact_power_of_ten(exponent);
        }
+#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
        if (is_negative) {
          value = -value;
        }
+#endif
        return true;
      }
+#ifndef FASTFLOAT_ONLY_ROUNDS_TO_NEAREST_SUPPORTED
    } else {
      // We do not have that fegetround() == FE_TONEAREST.
      // Next is a modified Clinger's fast path, inspired by Jakub Jelínek's
@ -230,17 +245,24 @@ clinger_fast_path_impl(uint64_t mantissa, int64_t exponent, bool is_negative,
 #if defined(__clang__) || defined(FASTFLOAT_32BIT)
        // Clang may map 0 to -0.0 when fegetround() == FE_DOWNWARD
        if (mantissa == 0) {
-          value = is_negative ? T(-0.) : T(0.);
+          value =
+#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
+              is_negative ? T(-0.) :
+#endif
+                          T(0.);
          return true;
        }
 #endif
        value = T(mantissa) * binary_format<T>::exact_power_of_ten(exponent);
+#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
        if (is_negative) {
          value = -value;
        }
+#endif
        return true;
      }
    }
+#endif
  }
  return false;
 }
@ -252,7 +274,7 @@ clinger_fast_path_impl(uint64_t mantissa, int64_t exponent, bool is_negative,
 */
 template <typename T, typename UC>
 FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
-from_chars_advanced(parsed_number_string_t<UC> &pns, T &value) noexcept {
+from_chars_advanced(parsed_number_string_t<UC> const &pns, T &value) noexcept {
  static_assert(is_supported_float_type<T>::value,
                "only some floating-point types are supported");
  static_assert(is_supported_char_type<UC>::value,
@ -263,8 +285,11 @@ from_chars_advanced(parsed_number_string_t<UC> &pns, T &value) noexcept {
  answer.ec = std::errc(); // be optimistic
  answer.ptr = pns.lastmatch;

-  if (!pns.too_many_digits &&
-      clinger_fast_path_impl(pns.mantissa, pns.exponent, pns.negative, value))
+  if (!pns.too_many_digits && clinger_fast_path_impl(pns.mantissa, pns.exponent,
+#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
+                                                     pns.negative,
+#endif
+                                                     value))
    return answer;

  adjusted_mantissa am =
@ -280,7 +305,11 @@ from_chars_advanced(parsed_number_string_t<UC> &pns, T &value) noexcept {
  if (am.power2 < 0) {
    am = digit_comp<T>(pns, am);
  }
-  to_float(pns.negative, am, value);
+  to_float(
+#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
+      pns.negative,
+#endif
+      am, value);
  // Test for over/underflow.
  if ((pns.mantissa != 0 && am.mantissa == 0 && am.power2 == 0) ||
      am.power2 == binary_format<T>::infinite_power()) {
@ -292,38 +321,51 @@ from_chars_advanced(parsed_number_string_t<UC> &pns, T &value) noexcept {
 template <typename T, typename UC>
 FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
 from_chars_float_advanced(UC const *first, UC const *last, T &value,
-                          parse_options_t<UC> options) noexcept {
-
+                          parse_options_t<UC> const options) noexcept {
  static_assert(is_supported_float_type<T>::value,
                "only some floating-point types are supported");
  static_assert(is_supported_char_type<UC>::value,
                "only char, wchar_t, char16_t and char32_t are supported");

-  chars_format const fmt = detail::adjust_for_feature_macros(options.format);
-
  from_chars_result_t<UC> answer;
-  if (uint64_t(fmt & chars_format::skip_white_space)) {
+#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
+  if (chars_format_t(options.format & chars_format::skip_white_space)) {
    while ((first != last) && fast_float::is_space(*first)) {
-      first++;
+      ++first;
    }
  }
+#else
+#ifdef FASTFLOAT_ISNOT_CHECKED_BOUNDS
+  // We are in parser code with external loop that checks bounds.
+  FASTFLOAT_ASSUME(first < last);
+#endif
+#endif
+#ifndef FASTFLOAT_ISNOT_CHECKED_BOUNDS
  if (first == last) {
    answer.ec = std::errc::invalid_argument;
    answer.ptr = first;
    return answer;
  }
-  parsed_number_string_t<UC> pns =
-      uint64_t(fmt & detail::basic_json_fmt)
+#endif
+  parsed_number_string_t<UC> const pns =
+#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
+      (chars_format_t(options.format & detail::basic_json_fmt))
          ? parse_number_string<true, UC>(first, last, options)
-          : parse_number_string<false, UC>(first, last, options);
-  if (!pns.valid) {
-    if (uint64_t(fmt & chars_format::no_infnan)) {
+          :
+#endif
+          parse_number_string<false, UC>(first, last, options);
+  if (pns.invalid) {
+#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
+    if (chars_format_t(options.format & chars_format::no_infnan)) {
+#endif
      answer.ec = std::errc::invalid_argument;
      answer.ptr = first;
      return answer;
+#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
    } else {
-      return detail::parse_infnan(first, last, value, fmt);
+      return detail::parse_infnan(first, last, value, options.format);
    }
+#endif
  }

  // call overload that takes parsed_number_string_t directly.
@ -332,55 +374,80 @@ from_chars_float_advanced(UC const *first, UC const *last, T &value,

 template <typename T, typename UC, typename>
 FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
-from_chars(UC const *first, UC const *last, T &value, int base) noexcept {
-
+from_chars(UC const *first, UC const *last, T &value, int const base) noexcept {
  static_assert(is_supported_integer_type<T>::value,
                "only integer types are supported");
  static_assert(is_supported_char_type<UC>::value,
                "only char, wchar_t, char16_t and char32_t are supported");

-  parse_options_t<UC> options;
-  options.base = base;
+  parse_options_t<UC> const options(chars_format::general, UC('.'),
+                                    static_cast<base_t>(base));
  return from_chars_advanced(first, last, value, options);
 }

 template <typename T>
 FASTFLOAT_CONSTEXPR20
    typename std::enable_if<is_supported_float_type<T>::value, T>::type
-    integer_times_pow10(uint64_t mantissa, int decimal_exponent) noexcept {
+    integer_times_pow10(uint64_t const mantissa,
+                        int const decimal_exponent) noexcept {
  T value;
-  if (clinger_fast_path_impl(mantissa, decimal_exponent, false, value))
+  const auto exponent = static_cast<am_pow_t>(decimal_exponent);
+  if (clinger_fast_path_impl(mantissa, exponent,
+#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
+                             false,
+#endif
+                             value))
    return value;

-  adjusted_mantissa am =
-      compute_float<binary_format<T>>(decimal_exponent, mantissa);
-  to_float(false, am, value);
+  adjusted_mantissa am = compute_float<binary_format<T>>(exponent, mantissa);
+  to_float(
+#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
+      false,
+#endif
+      am, value);
  return value;
 }

 template <typename T>
 FASTFLOAT_CONSTEXPR20
    typename std::enable_if<is_supported_float_type<T>::value, T>::type
-    integer_times_pow10(int64_t mantissa, int decimal_exponent) noexcept {
-  const bool is_negative = mantissa < 0;
-  const uint64_t m = static_cast<uint64_t>(is_negative ? -mantissa : mantissa);
-
+    integer_times_pow10(int64_t const mantissa,
+                        int const decimal_exponent) noexcept {
+#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
+  const auto is_negative = mantissa < 0;
+  const auto m = static_cast<am_mant_t>(is_negative ? -mantissa : mantissa);
+#else
+  FASTFLOAT_ASSUME(mantissa >= 0);
+  const auto m = static_cast<am_mant_t>(mantissa);
+#endif
+  const auto exponent = static_cast<am_pow_t>(decimal_exponent);
  T value;
-  if (clinger_fast_path_impl(m, decimal_exponent, is_negative, value))
+  if (clinger_fast_path_impl(m, exponent,
+#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
+                             is_negative,
+#endif
+                             value))
    return value;

-  adjusted_mantissa am = compute_float<binary_format<T>>(decimal_exponent, m);
-  to_float(is_negative, am, value);
+  adjusted_mantissa const am = compute_float<binary_format<T>>(exponent, m);
+
+  to_float(
+#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
+      is_negative,
+#endif
+      am, value);
  return value;
 }

 FASTFLOAT_CONSTEXPR20 inline double
-integer_times_pow10(uint64_t mantissa, int decimal_exponent) noexcept {
+integer_times_pow10(uint64_t const mantissa,
+                    int const decimal_exponent) noexcept {
  return integer_times_pow10<double>(mantissa, decimal_exponent);
 }

 FASTFLOAT_CONSTEXPR20 inline double
-integer_times_pow10(int64_t mantissa, int decimal_exponent) noexcept {
+integer_times_pow10(int64_t const mantissa,
+                    int const decimal_exponent) noexcept {
  return integer_times_pow10<double>(mantissa, decimal_exponent);
 }

@ -392,7 +459,8 @@ FASTFLOAT_CONSTEXPR20
                                std::is_integral<Int>::value &&
                                !std::is_signed<Int>::value,
                            T>::type
-    integer_times_pow10(Int mantissa, int decimal_exponent) noexcept {
+    integer_times_pow10(Int const mantissa,
+                        int const decimal_exponent) noexcept {
  return integer_times_pow10<T>(static_cast<uint64_t>(mantissa),
                                decimal_exponent);
 }
@ -403,7 +471,8 @@ FASTFLOAT_CONSTEXPR20
                                std::is_integral<Int>::value &&
                                std::is_signed<Int>::value,
                            T>::type
-    integer_times_pow10(Int mantissa, int decimal_exponent) noexcept {
+    integer_times_pow10(Int const mantissa,
+                        int const decimal_exponent) noexcept {
  return integer_times_pow10<T>(static_cast<int64_t>(mantissa),
                                decimal_exponent);
 }
@ -411,37 +480,44 @@ FASTFLOAT_CONSTEXPR20
 template <typename Int>
 FASTFLOAT_CONSTEXPR20 typename std::enable_if<
    std::is_integral<Int>::value && !std::is_signed<Int>::value, double>::type
-integer_times_pow10(Int mantissa, int decimal_exponent) noexcept {
+integer_times_pow10(Int const mantissa, int const decimal_exponent) noexcept {
  return integer_times_pow10(static_cast<uint64_t>(mantissa), decimal_exponent);
 }

 template <typename Int>
 FASTFLOAT_CONSTEXPR20 typename std::enable_if<
    std::is_integral<Int>::value && std::is_signed<Int>::value, double>::type
-integer_times_pow10(Int mantissa, int decimal_exponent) noexcept {
+integer_times_pow10(Int const mantissa, int const decimal_exponent) noexcept {
  return integer_times_pow10(static_cast<int64_t>(mantissa), decimal_exponent);
 }

 template <typename T, typename UC>
 FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
 from_chars_int_advanced(UC const *first, UC const *last, T &value,
-                        parse_options_t<UC> options) noexcept {
-
+                        parse_options_t<UC> const options) noexcept {
  static_assert(is_supported_integer_type<T>::value,
                "only integer types are supported");
  static_assert(is_supported_char_type<UC>::value,
                "only char, wchar_t, char16_t and char32_t are supported");

-  chars_format const fmt = detail::adjust_for_feature_macros(options.format);
-  int const base = options.base;
-
-  from_chars_result_t<UC> answer;
-  if (uint64_t(fmt & chars_format::skip_white_space)) {
+#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
+  if (chars_format_t(options.format & chars_format::skip_white_space)) {
    while ((first != last) && fast_float::is_space(*first)) {
-      first++;
+      ++first;
    }
  }
-  if (first == last || base < 2 || base > 36) {
+#else
+#ifdef FASTFLOAT_ISNOT_CHECKED_BOUNDS
+  // We are in parser code with external loop that checks bounds.
+  FASTFLOAT_ASSUME(first < last);
+#endif
+#endif
+  if (
+#ifndef FASTFLOAT_ISNOT_CHECKED_BOUNDS
+      first == last ||
+#endif
+      options.base < 2 || options.base > 36) {
+    from_chars_result_t<UC> answer;
    answer.ec = std::errc::invalid_argument;
    answer.ptr = first;
    return answer;
@ -458,7 +534,7 @@ template <> struct from_chars_advanced_caller<1> {
  template <typename T, typename UC>
  FASTFLOAT_CONSTEXPR20 static from_chars_result_t<UC>
  call(UC const *first, UC const *last, T &value,
-       parse_options_t<UC> options) noexcept {
+       parse_options_t<UC> const options) noexcept {
    return from_chars_float_advanced(first, last, value, options);
  }
 };
@ -467,7 +543,7 @@ template <> struct from_chars_advanced_caller<2> {
  template <typename T, typename UC>
  FASTFLOAT_CONSTEXPR20 static from_chars_result_t<UC>
  call(UC const *first, UC const *last, T &value,
-       parse_options_t<UC> options) noexcept {
+       parse_options_t<UC> const options) noexcept {
    return from_chars_int_advanced(first, last, value, options);
  }
 };
@ -475,7 +551,7 @@ template <> struct from_chars_advanced_caller<2> {
 template <typename T, typename UC>
 FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
 from_chars_advanced(UC const *first, UC const *last, T &value,
-                    parse_options_t<UC> options) noexcept {
+                    parse_options_t<UC> const options) noexcept {
  return from_chars_advanced_caller<
      size_t(is_supported_float_type<T>::value) +
      2 * size_t(is_supported_integer_type<T>::value)>::call(first, last, value,
--- a/tests/basictest.cpp
+++ b/tests/basictest.cpp
@ -69,7 +69,7 @@ template <typename T> std::string fHexAndDec(T v) {
  return ss.str();
 }

-char const *round_name(int d) {
+constexpr std::string_view const round_name(int d) {
  switch (d) {
  case FE_UPWARD:
    return "FE_UPWARD";
@ -2328,7 +2328,7 @@ TEST_CASE("integer_times_pow10") {

  for (int mode : {FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO, FE_TONEAREST}) {
    fesetround(mode);
-    INFO("fesetround(): " << std::string{round_name(mode)});
+    INFO("fesetround(): " << round_name(mode));

    struct Guard {
      ~Guard() { fesetround(FE_TONEAREST); }
--- a/tests/json_fmt.cpp
+++ b/tests/json_fmt.cpp
@ -136,7 +136,7 @@ int main() {
        fast_float::parse_options(
            fast_float::chars_format::json |
            fast_float::chars_format::allow_leading_plus)); // should be ignored
-    if (answer.valid) {
+    if (!answer.invalid) {
      std::cerr << "json parse accepted invalid json " << f << std::endl;
      return EXIT_FAILURE;
    }
@ -167,4 +167,4 @@ int main() {
 #endif

  return EXIT_SUCCESS;
-}
+}
--- a/tests/long_test.cpp
+++ b/tests/long_test.cpp
@ -51,7 +51,6 @@ template <typename T> bool test() {
 }

 int main() {
-
  std::cout << "32 bits checks" << std::endl;
  Assert(test<float>());

--- a/tests/random_string.cpp
+++ b/tests/random_string.cpp
@ -198,7 +198,7 @@ bool tester(uint64_t seed, size_t volume) {
  char buffer[4096]; // large buffer (can't overflow)
  RandomEngine rand(seed);
  for (size_t i = 0; i < volume; i++) {
-    if ((i % 100000) == 0) {
+    if ((i % 1000000) == 0) {
      std::cout << ".";
      std::cout.flush();
    }
@ -256,10 +256,12 @@ bool tester(uint64_t seed, size_t volume) {
 }

 int main() {
-
 #if defined(__CYGWIN__) || defined(__MINGW32__) || defined(__MINGW64__) ||     \
    defined(sun) || defined(__sun)
-  std::cout << "Warning: msys/cygwin or solaris detected." << std::endl;
+  std::cout << "Warning: msys/cygwin or solaris detected. This particular test "
+               "is likely to generate false failures due to our reliance on "
+               "the underlying runtime library."
+            << std::endl;
  return EXIT_SUCCESS;
 #else
  if (tester(1234344, 100000000)) {
--- a/tests/rcppfastfloat_test.cpp
+++ b/tests/rcppfastfloat_test.cpp
@ -8,9 +8,9 @@
 #include <vector>

 struct test_data {
-  std::string input;
-  bool expected_success;
-  double expected_result;
+  const std::string input;
+  const bool expected_success;
+  const double expected_result;
 };

 bool eddelbuettel() {
@ -51,10 +51,10 @@ bool eddelbuettel() {
      {"-+inf", false, 0.0},
      {"-+nan", false, 0.0},
  };
-  for (size_t i = 0; i < test_datas.size(); i++) {
-    auto const &input = test_datas[i].input;
-    auto const expected_success = test_datas[i].expected_success;
-    auto const expected_result = test_datas[i].expected_result;
+  for (const auto &i : test_datas) {
+    auto const &input = i.input;
+    auto const expected_success = i.expected_success;
+    auto const expected_result = i.expected_result;
    double result;
    // answer contains a error code and a pointer to the end of the
    // parsed region (on success).
--- a/tests/short_random_string.cpp
+++ b/tests/short_random_string.cpp
@ -253,9 +253,9 @@ bool tester(uint64_t seed, size_t volume) {
 int main() {
 #if defined(__CYGWIN__) || defined(__MINGW32__) || defined(__MINGW64__) ||     \
    defined(sun) || defined(__sun)
-  std::cout << "Warning: msys/cygwin detected. This particular test is likely "
-               "to generate false failures due to our reliance on the "
-               "underlying runtime library."
+  std::cout << "Warning: msys/cygwin or solaris detected. This particular test "
+               "is likely to generate false failures due to our reliance on "
+               "the underlying runtime library."
            << std::endl;
  return EXIT_SUCCESS;
 #else
@ -263,6 +263,7 @@ int main() {
    std::cout << "All tests ok." << std::endl;
    return EXIT_SUCCESS;
  }
+  std::cout << "Failure." << std::endl;
  return EXIT_FAILURE;

 #endif