From 1afba556e344360c403ea060b041fa27032fa4b4 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <lemire@gmail.com>
Date: Tue, 17 Nov 2020 21:55:01 -0500
Subject: [PATCH] Extending the fast path.

---
 include/fast_float/decimal_to_binary.h | 43 +++++++++++-
 tests/CMakeLists.txt                   |  1 +
 tests/powersoffive_hardround.cpp       | 94 ++++++++++++++++++++++++++
 3 files changed, 136 insertions(+), 2 deletions(-)
 create mode 100644 tests/powersoffive_hardround.cpp

diff --git a/include/fast_float/decimal_to_binary.h b/include/fast_float/decimal_to_binary.h
index 68ab079..0b2741e 100644
--- a/include/fast_float/decimal_to_binary.h
+++ b/include/fast_float/decimal_to_binary.h
@@ -60,8 +60,40 @@ namespace {
   fastfloat_really_inline int power(int q)  noexcept  {
     return (((152170 + 65536) * q) >> 16) + 63;
   }
+  // Checks whether w is divisible by 5**-q. If it returns true, then
+  // w is definitively divisible by 5**-q.
+  inline bool is_divisible(int64_t q, uint64_t w)  noexcept  {
+    if((q>=-18) || (q<-27)) { return false; }
+    int64_t pos_q = -q;
+    // For each pair, first entry is the multiplicative inverse of 5**-q
+    // and the second one is the largest quotient.
+    //
+    // This could be more efficient by using...
+    // Faster remainder by direct computation: Applications to compilers and software libraries
+    // Software: Practice and Experience 49 (6), 2019.
+    // but the following is simple enough.
+    constexpr static uint64_t table[10][2] = {
+     {0xc1773b91fac10669,0x49c977}, // inverse of 5**18
+     {0x26b172506559ce15,0xec1e4}, // inverse of 5**19
+     {0xd489e3a9addec2d1,0x2f394}, // inverse of 5**20
+     {0x90e860bb892c8d5d,0x971d}, // inverse of 5**21
+     {0x502e79bf1b6f4f79,0x1e39}, // inverse of 5**22
+     {0xdcd618596be30fe5,0x60b}, // inverse of 5**23
+     {0x2c2ad1ab7bfa3661,0x135}, // inverse of 5**24
+     {0x8d55d224bfed7ad,0x3d}, // inverse of 5**25
+     {0x1c445d3a8cc9189,0xc}, // inverse of 5**26
+     {0xcd27412a54f5b6b5,0x2}, // inverse of 5**27
+    };
+    uint64_t inverse = table[pos_q-18][0];
+    uint64_t threshold = table[pos_q-18][1];
+    uint64_t product = w * inverse;
+    if(product > threshold) { return false; }
+    return true;
+  }
 } // namespace
 
+
+ 
 // w * 10 ** q
 // The returned value should be a valid ieee64 number that simply need to be packed.
 // However, in some very rare cases, the computation will fail. In such cases, we
@@ -93,12 +125,19 @@ adjusted_mantissa compute_float(int64_t q, uint64_t w)  noexcept  {
   // 1. We need the implicit bit
   // 2. We need an extra bit for rounding purposes
   // 3. We might lose a bit due to the "upperbit" routine (result too small, requiring a shift)
+
   value128 product = compute_product_approximation<binary::mantissa_explicit_bits() + 3>(q, w);
   if(product.low == 0xFFFFFFFFFFFFFFFF) { //  could guard it further
     // In some very rare cases, this could happen, in which case we might need a more accurate
     // computation that what we can provide cheaply. This is very, very unlikely.
-    answer.power2 = -1; // This (a negative value) indicates an error condition.
-    return answer;
+    //
+    // There is still a chance to recover. If w is divisible by 5**-q, 
+    if(!is_divisible(q,w)) {
+      answer.power2 = -1; // This (a negative value) indicates an error condition.
+      return answer;
+    }
+    product.low += 1;
+    product.high += 1;
   }
   // The "compute_product_approximation" function can be slightly slower than a branchless approach:
   // value128 product = compute_product(q, w);
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 0b32b52..74cfbad 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -8,6 +8,7 @@ function(fast_float_add_cpp_test TEST_NAME)
     endif()
     target_link_libraries(${TEST_NAME}  PUBLIC fast_float)
 endfunction(fast_float_add_cpp_test)
+fast_float_add_cpp_test(powersoffive_hardround)
 fast_float_add_cpp_test(short_random_string)
 fast_float_add_cpp_test(exhaustive32_midpoint)
 fast_float_add_cpp_test(random_string)
diff --git a/tests/powersoffive_hardround.cpp b/tests/powersoffive_hardround.cpp
new file mode 100644
index 0000000..eea82bf
--- /dev/null
+++ b/tests/powersoffive_hardround.cpp
@@ -0,0 +1,94 @@
+#include "fast_float/fast_float.h"
+
+#include <iostream>
+#include <random>
+#include <sstream>
+#include <vector>
+
+std::pair<double, bool> strtod_from_string(const char *st) {
+  double d;
+  char *pr;
+#ifdef _WIN32
+  static _locale_t c_locale = _create_locale(LC_ALL, "C");
+  d = _strtod_l(st, &pr, c_locale);
+#else
+  static locale_t c_locale = newlocale(LC_ALL_MASK, "C", NULL);
+  d = strtod_l(st, &pr, c_locale);
+#endif
+  if (st == pr) {
+    std::cerr << "strtod_l could not parse '" << st << std::endl;
+    return std::make_pair(0, false);
+  }
+  return std::make_pair(d, true);
+}
+
+std::pair<float, bool> strtof_from_string(char *st) {
+  float d;
+  char *pr;
+#if defined(__CYGWIN__) || defined(__MINGW32__) || defined(__MINGW64__)
+  d = cygwin_strtod_l(st, &pr);
+#elif defined(_WIN32)
+  static _locale_t c_locale = _create_locale(LC_ALL, "C");
+  d = _strtof_l(st, &pr, c_locale);
+#else
+  static locale_t c_locale = newlocale(LC_ALL_MASK, "C", NULL);
+  d = strtof_l(st, &pr, c_locale);
+#endif
+  if (st == pr) {
+    std::cerr << "strtof_l could not parse '" << st << std::endl;
+    return std::make_pair(0.0f, false);
+  }
+  return std::make_pair(d, true);
+}
+
+bool tester() {
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  for (int q = 18; q <= 27; q++) {
+    std::cout << "q = " << -q << std::endl;
+    uint64_t power5 = 1;
+    for (int k = 0; k < q; k++) {
+      power5 *= 5;
+    }
+    uint64_t low_threshold = 0x20000000000000 / power5 + 1;
+    uint64_t threshold = 0xFFFFFFFFFFFFFFFF / power5;
+    std::uniform_int_distribution<uint64_t> dis(low_threshold, threshold);
+    for (size_t i = 0; i < 10000; i++) {
+      uint64_t mantissa = dis(gen) * power5;
+      std::stringstream ss;
+      ss << mantissa;
+      ss << "e";
+      ss << -q;
+      std::string to_be_parsed = ss.str();
+      std::pair<double, bool> expected_double =
+          strtod_from_string(to_be_parsed.c_str());
+      double result_value;
+      auto result =
+          fast_float::from_chars(to_be_parsed.data(), to_be_parsed.data() + to_be_parsed.size(), result_value);
+      if (result.ec != std::errc()) {
+        std::cout << to_be_parsed << std::endl;
+        std::cerr << " I could not parse " << std::endl;
+        return false;
+      }
+      if (result_value != expected_double.first) {
+        std::cout << to_be_parsed << std::endl;
+        std::cerr << std::hexfloat << result_value << std::endl;
+        std::cerr << std::hexfloat << expected_double.first << std::endl;
+        std::cerr << " Mismatch " << std::endl;
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+int main() {
+  if (tester()) {
+    std::cout << std::endl;
+    std::cout << "all ok" << std::endl;
+    return EXIT_SUCCESS;
+  }
+  std::cerr << std::endl;
+  std::cerr << "errors were encountered" << std::endl;
+  return EXIT_FAILURE;
+}