Merge pull request #349 from shikharish/uint8

uint8_t parsing
This commit is contained in:
Daniel Lemire 2025-12-24 18:37:11 -05:00 committed by GitHub
commit 1ad224e42c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 226 additions and 0 deletions

View File

@ -509,6 +509,92 @@ parse_int_string(UC const *p, UC const *pend, T &value,
UC const *const start_digits = p; UC const *const start_digits = p;
FASTFLOAT_IF_CONSTEXPR17((std::is_same<T, std::uint8_t>::value)) {
const size_t len = (size_t)(pend - p);
if (len == 0) {
if (has_leading_zeros) {
value = 0;
answer.ec = std::errc();
answer.ptr = p;
} else {
answer.ec = std::errc::invalid_argument;
answer.ptr = first;
}
return answer;
}
union {
uint8_t as_str[4];
uint32_t as_int;
} digits;
if (cpp20_and_in_constexpr()) {
digits.as_int = 0;
for (size_t j = 0; j < 4 && j < len; ++j) {
digits.as_str[j] = static_cast<uint8_t>(p[j]);
}
} else if (len >= 4) {
memcpy(&digits.as_int, p, 4);
} else {
uint32_t b0 = static_cast<uint8_t>(p[0]);
uint32_t b1 = (len > 1) ? static_cast<uint8_t>(p[1]) : 0xFFu;
uint32_t b2 = (len > 2) ? static_cast<uint8_t>(p[2]) : 0xFFu;
uint32_t b3 = 0xFFu;
#if FASTFLOAT_IS_BIG_ENDIAN
digits.as_int = (b0 << 24) | (b1 << 16) | (b2 << 8) | b3;
#else
digits.as_int = b0 | (b1 << 8) | (b2 << 16) | (b3 << 24);
#endif
}
uint32_t magic =
((digits.as_int + 0x46464646u) | (digits.as_int - 0x30303030u)) &
0x80808080u;
uint32_t tz = (uint32_t)countr_zero_32(magic); // 7, 15, 23, 31, or 32
uint32_t nd = (tz == 32) ? 4 : (tz >> 3);
nd = (uint32_t)std::min((size_t)nd, len);
if (nd == 0) {
if (has_leading_zeros) {
value = 0;
answer.ec = std::errc();
answer.ptr = p;
return answer;
}
answer.ec = std::errc::invalid_argument;
answer.ptr = first;
return answer;
}
if (nd > 3) {
const UC *q = p + nd;
size_t rem = len - nd;
while (rem) {
if (*q < UC('0') || *q > UC('9'))
break;
++q;
--rem;
}
answer.ec = std::errc::result_out_of_range;
answer.ptr = q;
return answer;
}
digits.as_int ^= 0x30303030u;
digits.as_int <<= ((4 - nd) * 8);
uint32_t check = ((digits.as_int >> 24) & 0xff) |
((digits.as_int >> 8) & 0xff00) |
((digits.as_int << 8) & 0xff0000);
if (check > 0x00020505) {
answer.ec = std::errc::result_out_of_range;
answer.ptr = p + nd;
return answer;
}
value = (uint8_t)((0x640a01 * digits.as_int) >> 24);
answer.ec = std::errc();
answer.ptr = p + nd;
return answer;
}
uint64_t i = 0; uint64_t i = 0;
if (base == 10) { if (base == 10) {
loop_parse_if_eight_digits(p, pend, i); // use SIMD if possible loop_parse_if_eight_digits(p, pend, i); // use SIMD if possible

View File

@ -362,6 +362,52 @@ leading_zeroes(uint64_t input_num) {
#endif #endif
} }
/* Helper C++14 constexpr generic implementation of countr_zero for 32-bit */
fastfloat_really_inline FASTFLOAT_CONSTEXPR14 int
countr_zero_generic_32(uint32_t input_num) {
if (input_num == 0) {
return 32;
}
int last_bit = 0;
if (!(input_num & 0x0000FFFF)) {
input_num >>= 16;
last_bit |= 16;
}
if (!(input_num & 0x00FF)) {
input_num >>= 8;
last_bit |= 8;
}
if (!(input_num & 0x0F)) {
input_num >>= 4;
last_bit |= 4;
}
if (!(input_num & 0x3)) {
input_num >>= 2;
last_bit |= 2;
}
if (!(input_num & 0x1)) {
last_bit |= 1;
}
return last_bit;
}
/* count trailing zeroes for 32-bit integers */
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 int
countr_zero_32(uint32_t input_num) {
if (cpp20_and_in_constexpr()) {
return countr_zero_generic_32(input_num);
}
#ifdef FASTFLOAT_VISUAL_STUDIO
unsigned long trailing_zero = 0;
if (_BitScanForward(&trailing_zero, input_num)) {
return (int)trailing_zero;
}
return 32;
#else
return input_num == 0 ? 32 : __builtin_ctz(input_num);
#endif
}
// slow emulation routine for 32-bit // slow emulation routine for 32-bit
fastfloat_really_inline constexpr uint64_t emulu(uint32_t x, uint32_t y) { fastfloat_really_inline constexpr uint64_t emulu(uint32_t x, uint32_t y) {
return x * (uint64_t)y; return x * (uint64_t)y;

View File

@ -94,6 +94,7 @@ endif()
option(FASTFLOAT_EXHAUSTIVE "Exhaustive tests" OFF) option(FASTFLOAT_EXHAUSTIVE "Exhaustive tests" OFF)
if (FASTFLOAT_EXHAUSTIVE) if (FASTFLOAT_EXHAUSTIVE)
fast_float_add_cpp_test(ipv4_test)
fast_float_add_cpp_test(short_random_string) fast_float_add_cpp_test(short_random_string)
fast_float_add_cpp_test(exhaustive32_midpoint) fast_float_add_cpp_test(exhaustive32_midpoint)
fast_float_add_cpp_test(random_string) fast_float_add_cpp_test(random_string)

93
tests/ipv4_test.cpp Normal file
View File

@ -0,0 +1,93 @@
#include <charconv>
#include <cstdint>
#include <iostream>
#include <algorithm>
#include "fast_float/fast_float.h"
char *uint8_to_chars_manual(char *ptr, uint8_t value) {
if (value == 0) {
*ptr++ = '0';
return ptr;
}
char *start = ptr;
while (value > 0) {
*ptr++ = '0' + (value % 10);
value /= 10;
}
// Reverse the digits written so far
std::reverse(start, ptr);
return ptr;
}
void uint32_to_ipv4_string(uint32_t ip, char *buffer) {
uint8_t octets[4] = {static_cast<uint8_t>(ip >> 24),
static_cast<uint8_t>(ip >> 16),
static_cast<uint8_t>(ip >> 8), static_cast<uint8_t>(ip)};
char *ptr = buffer;
for (int i = 0; i < 4; ++i) {
ptr = uint8_to_chars_manual(ptr, octets[i]);
if (i < 3) {
*ptr++ = '.';
}
}
*ptr = '\0';
}
fastfloat_really_inline uint32_t ipv4_string_to_uint32(const char *str,
const char *end) {
uint32_t ip = 0;
const char *current = str;
for (int i = 0; i < 4; ++i) {
uint8_t value;
auto r = fast_float::from_chars(current, end, value);
if (r.ec != std::errc()) {
throw std::invalid_argument("Invalid IP address format");
}
current = r.ptr;
ip = (ip << 8) | value;
if (i < 3) {
if (current == end || *current++ != '.') {
throw std::invalid_argument("Invalid IP address format");
}
}
}
return ip;
}
bool test_all_ipv4_conversions() {
std::cout << "Testing all IPv4 conversions... 0, 1000, 2000, 3000, 4000, "
"5000, 6000, 7000, 8000, 9000, ..."
<< std::endl;
char buffer[16];
for (uint64_t ip = 0; ip <= 0xFFFFFFFF; ip += 1000) {
if (ip % 10000000 == 0) {
std::cout << "." << std::flush;
}
uint32_to_ipv4_string(static_cast<uint32_t>(ip), buffer);
const char *end = buffer + strlen(buffer);
uint32_t parsed_ip = ipv4_string_to_uint32(buffer, end);
if (parsed_ip != ip) {
std::cerr << "Mismatch: original " << ip << ", parsed " << parsed_ip
<< std::endl;
return false;
}
}
std::cout << std::endl;
return true;
}
int main() {
if (test_all_ipv4_conversions()) {
std::cout << "All IPv4 conversions passed!" << std::endl;
return EXIT_SUCCESS;
} else {
std::cerr << "IPv4 conversion test failed!" << std::endl;
return EXIT_FAILURE;
}
}