From 8df999e44e333b6b4299ae51588774611f9e39cc Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 2 Nov 2020 15:01:15 -0500 Subject: [PATCH] One more range of tests. --- tests/CMakeLists.txt | 1 + tests/short_random_string.cpp | 211 ++++++++++++++++++++++++++++++++++ 2 files changed, 212 insertions(+) create mode 100644 tests/short_random_string.cpp diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 31b42a5..c653145 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -4,6 +4,7 @@ function(fast_float_add_cpp_test TEST_NAME) add_test(${TEST_NAME} ${TEST_NAME}) target_link_libraries(${TEST_NAME} PUBLIC fast_float) endfunction(fast_float_add_cpp_test) +fast_float_add_cpp_test(short_random_string) fast_float_add_cpp_test(exhaustive32_midpoint) fast_float_add_cpp_test(random_string) fast_float_add_cpp_test(string_test) diff --git a/tests/short_random_string.cpp b/tests/short_random_string.cpp new file mode 100644 index 0000000..1ced98f --- /dev/null +++ b/tests/short_random_string.cpp @@ -0,0 +1,211 @@ +#include "fast_float/fast_float.h" +#include +#include + +#if defined(__CYGWIN__) || defined(__MINGW32__) || defined(__MINGW64__) +// Anything at all that is related to cygwin, msys and so forth will +// always use this fallback because we cannot rely on it behaving as normal +// gcc. +#include +#include +// workaround for CYGWIN +double cygwin_strtod_l(const char* start, char** end) { + double d; + std::stringstream ss; + ss.imbue(std::locale::classic()); + ss << start; + ss >> d; + size_t nread = ss.tellg(); + *end = const_cast(start) + nread; + return d; +} +#endif + +class RandomEngine { +public: + RandomEngine() = delete; + RandomEngine(int new_seed) { wyhash64_x_ = new_seed; }; + uint64_t next() { + // Adapted from https://github.com/wangyi-fudan/wyhash/blob/master/wyhash.h + // Inspired from + // https://github.com/lemire/testingRNG/blob/master/source/wyhash.h + wyhash64_x_ += UINT64_C(0x60bee2bee120fc15); + fast_float::value128 tmp = fast_float::full_multiplication(wyhash64_x_, UINT64_C(0xa3b195354a39b70d)); + uint64_t m1 = (tmp.high) ^ tmp.low; + tmp = fast_float::full_multiplication(m1, UINT64_C(0x1b03738712fad5c9)); + uint64_t m2 = (tmp.high) ^ tmp.low; + return m2; + } + bool next_bool() { return (next() & 1) == 1; } + int next_int() { return static_cast(next()); } + char next_char() { return static_cast(next()); } + double next_double() { return static_cast(next()); } + + int next_ranged_int(int min, int max) { // min and max are included + // Adapted from + // https://lemire.me/blog/2019/06/06/nearly-divisionless-random-integer-generation-on-various-systems/ + /* if (min == max) { + return min; + }*/ + int s = max - min + 1; + uint64_t x = next(); + fast_float::value128 m = fast_float::full_multiplication(x, s); + uint64_t l = m.low; + if (l < s) { + uint64_t t = -s % s; + while (l < t) { + x = next(); + m = fast_float::full_multiplication(x, s); + l = m.low; + } + } + return (m.high) + min; + } + int next_digit() { return next_ranged_int(0, 9); } + +private: + uint64_t wyhash64_x_; +}; + +size_t build_random_string(RandomEngine &rand, char *buffer) { + size_t pos{0}; + if (rand.next_bool()) { + buffer[pos++] = '-'; + } + int number_of_digits = rand.next_ranged_int(1, 19); + int location_of_decimal_separator = rand.next_ranged_int(1, number_of_digits); + for (size_t i = 0; i < number_of_digits; i++) { + if (i == location_of_decimal_separator) { + buffer[pos++] = '.'; + } + buffer[pos++] = char(rand.next_digit() + '0'); + } + if (rand.next_bool()) { + if (rand.next_bool()) { + buffer[pos++] = 'e'; + } else { + buffer[pos++] = 'E'; + } + if (rand.next_bool()) { + buffer[pos++] = '-'; + } else { + if (rand.next_bool()) { + buffer[pos++] = '+'; + } + } + number_of_digits = rand.next_ranged_int(1, 3); + for (size_t i = 0; i < number_of_digits; i++) { + buffer[pos++] = char(rand.next_digit() + '0'); + } + } + buffer[pos] = '\0'; // null termination + return pos; +} + +std::pair strtod_from_string(char *st) { + double d; + char *pr; +#ifdef _WIN32 + static _locale_t c_locale = _create_locale(LC_ALL, "C"); + d = _strtod_l(st, &pr, c_locale); +#else + static locale_t c_locale = newlocale(LC_ALL_MASK, "C", NULL); + d = strtod_l(st, &pr, c_locale); +#endif + if (st == pr) { + std::cerr << "strtod_l could not parse '" << st << std::endl; + return std::make_pair(0, false); + } + return std::make_pair(d, true); +} + +std::pair strtof_from_string(char *st) { + float d; + char *pr; +#if defined(__CYGWIN__) || defined(__MINGW32__) || defined(__MINGW64__) + d = cygwin_strtod_l(st, &pr); +#elif defined(_WIN32) + static _locale_t c_locale = _create_locale(LC_ALL, "C"); + d = _strtof_l(st, &pr, c_locale); +#else + static locale_t c_locale = newlocale(LC_ALL_MASK, "C", NULL); + d = strtof_l(st, &pr, c_locale); +#endif + if (st == pr) { + std::cerr << "strtof_l could not parse '" << st << std::endl; + return std::make_pair(0.0f, false); + } + return std::make_pair(d, true); +} + +/** + * We generate random strings and we try to parse them with both strtod/strtof, + * and we verify that we get the same answer with with fast_float::from_chars. + */ +bool tester(int seed, size_t volume) { + char buffer[4096]; // large buffer (can't overflow) + RandomEngine rand(seed); + for (size_t i = 0; i < volume; i++) { + if((i%1000000) == 0) { std::cout << "."; std::cout.flush(); } + size_t length = build_random_string(rand, buffer); + std::pair expected_double = strtod_from_string(buffer); + if (expected_double.second) { + double result_value; + auto result = + fast_float::from_chars(buffer, buffer + length, result_value); + if (result.ec != std::errc()) { + printf("parsing %.*s\n", int(length), buffer); + std::cerr << " I could not parse " << std::endl; + return false; + } + if (result.ptr != buffer + length) { + printf("parsing %.*s\n", int(length), buffer); + std::cerr << " Did not get to the end " << std::endl; + return false; + } + if (result_value != expected_double.first) { + printf("parsing %.*s\n", int(length), buffer); + std::cerr << std::hexfloat << result_value << std::endl; + std::cerr << std::hexfloat << expected_double.first << std::endl; + std::cerr << " Mismatch " << std::endl; + return false; + } + } + std::pair expected_float = strtof_from_string(buffer); + if (expected_float.second) { + float result_value; + auto result = + fast_float::from_chars(buffer, buffer + length, result_value); + if (result.ec != std::errc()) { + printf("parsing %.*s\n", int(length), buffer); + std::cerr << " I could not parse " << std::endl; + return false; + } + if (result.ptr != buffer + length) { + printf("parsing %.*s\n", int(length), buffer); + std::cerr << " Did not get to the end " << std::endl; + return false; + } + if (result_value != expected_float.first) { + printf("parsing %.*s\n", int(length), buffer); + std::cerr << std::hexfloat << result_value << std::endl; + std::cerr << std::hexfloat << expected_float.first << std::endl; + std::cerr << " Mismatch " << std::endl; + return false; + } + } + } + return true; +} + +int main() { +#if defined(__CYGWIN__) || defined(__MINGW32__) || defined(__MINGW64__) + std::cout << "Warning: msys/cygwin detected. This particular test is likely to generate false failures due to our reliance on the underlying runtime library." << std::endl; +#endif + if (tester(1234344, 100000000)) { + std::cout << "All tests ok." << std::endl; + return EXIT_SUCCESS; + } + std::cout << "Failure." << std::endl; + return EXIT_FAILURE; +} \ No newline at end of file