Merge pull request #68 from fastfloat/dlemire/template_trick

This uses the template trick to ensure we get only one definition
This commit is contained in:
Daniel Lemire 2021-04-08 14:36:47 -04:00 committed by GitHub
commit 6c97156f6d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 19 additions and 10 deletions

View File

@ -20,18 +20,18 @@ namespace fast_float {
template <int bit_precision> template <int bit_precision>
fastfloat_really_inline fastfloat_really_inline
value128 compute_product_approximation(int64_t q, uint64_t w) { value128 compute_product_approximation(int64_t q, uint64_t w) {
const int index = 2 * int(q - smallest_power_of_five); const int index = 2 * int(q - powers::smallest_power_of_five);
// For small values of q, e.g., q in [0,27], the answer is always exact because // For small values of q, e.g., q in [0,27], the answer is always exact because
// The line value128 firstproduct = full_multiplication(w, power_of_five_128[index]); // The line value128 firstproduct = full_multiplication(w, power_of_five_128[index]);
// gives the exact answer. // gives the exact answer.
value128 firstproduct = full_multiplication(w, power_of_five_128[index]); value128 firstproduct = full_multiplication(w, powers::power_of_five_128[index]);
static_assert((bit_precision >= 0) && (bit_precision <= 64), " precision should be in (0,64]"); static_assert((bit_precision >= 0) && (bit_precision <= 64), " precision should be in (0,64]");
constexpr uint64_t precision_mask = (bit_precision < 64) ? constexpr uint64_t precision_mask = (bit_precision < 64) ?
(uint64_t(0xFFFFFFFFFFFFFFFF) >> bit_precision) (uint64_t(0xFFFFFFFFFFFFFFFF) >> bit_precision)
: uint64_t(0xFFFFFFFFFFFFFFFF); : uint64_t(0xFFFFFFFFFFFFFFFF);
if((firstproduct.high & precision_mask) == precision_mask) { // could further guard with (lower + w < lower) if((firstproduct.high & precision_mask) == precision_mask) { // could further guard with (lower + w < lower)
// regarding the second product, we only need secondproduct.high, but our expectation is that the compiler will optimize this extra work away if needed. // regarding the second product, we only need secondproduct.high, but our expectation is that the compiler will optimize this extra work away if needed.
value128 secondproduct = full_multiplication(w, power_of_five_128[index + 1]); value128 secondproduct = full_multiplication(w, powers::power_of_five_128[index + 1]);
firstproduct.low += secondproduct.high; firstproduct.low += secondproduct.high;
if(secondproduct.high > firstproduct.low) { if(secondproduct.high > firstproduct.low) {
firstproduct.high++; firstproduct.high++;
@ -83,7 +83,7 @@ adjusted_mantissa compute_float(int64_t q, uint64_t w) noexcept {
answer.mantissa = 0; answer.mantissa = 0;
return answer; return answer;
} }
// At this point in time q is in [smallest_power_of_five, largest_power_of_five]. // At this point in time q is in [powers::smallest_power_of_five, powers::largest_power_of_five].
// We want the most significant bit of i to be 1. Shift if needed. // We want the most significant bit of i to be 1. Shift if needed.
int lz = leading_zeroes(w); int lz = leading_zeroes(w);

View File

@ -28,10 +28,18 @@ namespace fast_float {
* infinite in binary64 so we never need to worry about powers * infinite in binary64 so we never need to worry about powers
* of 5 greater than 308. * of 5 greater than 308.
*/ */
constexpr int smallest_power_of_five = -342; template <class unused = void>
constexpr int largest_power_of_five = 308; struct powers_template {
constexpr static int smallest_power_of_five = binary_format<double>::smallest_power_of_ten();
constexpr static int largest_power_of_five = binary_format<double>::largest_power_of_ten();
constexpr static int number_of_entries = 2 * (largest_power_of_five - smallest_power_of_five + 1);
// Powers of five from 5^-342 all the way to 5^308 rounded toward one. // Powers of five from 5^-342 all the way to 5^308 rounded toward one.
const uint64_t power_of_five_128[]= { static const uint64_t power_of_five_128[number_of_entries];
};
template <class unused>
const uint64_t powers_template<unused>::power_of_five_128[number_of_entries] = {
0xeef453d6923bd65a,0x113faa2906a13b3f, 0xeef453d6923bd65a,0x113faa2906a13b3f,
0x9558b4661b6565f8,0x4ac7ca59a424c507, 0x9558b4661b6565f8,0x4ac7ca59a424c507,
0xbaaee17fa23ebf76,0x5d79bcf00d2df649, 0xbaaee17fa23ebf76,0x5d79bcf00d2df649,
@ -683,6 +691,7 @@ const uint64_t power_of_five_128[]= {
0xb6472e511c81471d,0xe0133fe4adf8e952, 0xb6472e511c81471d,0xe0133fe4adf8e952,
0xe3d8f9e563a198e5,0x58180fddd97723a6, 0xe3d8f9e563a198e5,0x58180fddd97723a6,
0x8e679c2f5e44ff8f,0x570f09eaa7ea7648,}; 0x8e679c2f5e44ff8f,0x570f09eaa7ea7648,};
using powers = powers_template<>;
} }

View File

@ -273,14 +273,14 @@ adjusted_mantissa compute_float(decimal &d) {
} }
static const uint32_t max_shift = 60; static const uint32_t max_shift = 60;
static const uint32_t num_powers = 19; static const uint32_t num_powers = 19;
static const uint8_t powers[19] = { static const uint8_t decimal_powers[19] = {
0, 3, 6, 9, 13, 16, 19, 23, 26, 29, // 0, 3, 6, 9, 13, 16, 19, 23, 26, 29, //
33, 36, 39, 43, 46, 49, 53, 56, 59, // 33, 36, 39, 43, 46, 49, 53, 56, 59, //
}; };
int32_t exp2 = 0; int32_t exp2 = 0;
while (d.decimal_point > 0) { while (d.decimal_point > 0) {
uint32_t n = uint32_t(d.decimal_point); uint32_t n = uint32_t(d.decimal_point);
uint32_t shift = (n < num_powers) ? powers[n] : max_shift; uint32_t shift = (n < num_powers) ? decimal_powers[n] : max_shift;
detail::decimal_right_shift(d, shift); detail::decimal_right_shift(d, shift);
if (d.decimal_point < -decimal_point_range) { if (d.decimal_point < -decimal_point_range) {
// should be zero // should be zero
@ -300,7 +300,7 @@ adjusted_mantissa compute_float(decimal &d) {
shift = (d.digits[0] < 2) ? 2 : 1; shift = (d.digits[0] < 2) ? 2 : 1;
} else { } else {
uint32_t n = uint32_t(-d.decimal_point); uint32_t n = uint32_t(-d.decimal_point);
shift = (n < num_powers) ? powers[n] : max_shift; shift = (n < num_powers) ? decimal_powers[n] : max_shift;
} }
detail::decimal_left_shift(d, shift); detail::decimal_left_shift(d, shift);
if (d.decimal_point > decimal_point_range) { if (d.decimal_point > decimal_point_range) {