Merge pull request #391 from sahvx655-wq/int-fast-path-wide-units

reject non-digit wide code units in uint8/uint16 integer fast path
This commit is contained in:
Daniel Lemire 2026-06-11 20:28:10 -04:00 committed by GitHub
commit 0dce102cb4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 61 additions and 2 deletions

View File

@ -600,7 +600,8 @@ parse_int_string(UC const *p, UC const *pend, T &value,
UC const *const start_digits = p;
FASTFLOAT_IF_CONSTEXPR17((std::is_same<T, std::uint8_t>::value)) {
FASTFLOAT_IF_CONSTEXPR17((std::is_same<T, std::uint8_t>::value &&
sizeof(UC) == 1)) {
if (base == 10) {
const size_t len = (size_t)(pend - p);
if (len == 0) {
@ -692,7 +693,8 @@ parse_int_string(UC const *p, UC const *pend, T &value,
}
}
FASTFLOAT_IF_CONSTEXPR17((std::is_same<T, std::uint16_t>::value)) {
FASTFLOAT_IF_CONSTEXPR17((std::is_same<T, std::uint16_t>::value &&
sizeof(UC) == 1)) {
if (base == 10) {
const size_t len = size_t(pend - p);
if (len == 0) {

View File

@ -1295,6 +1295,63 @@ int main() {
return EXIT_FAILURE;
}
}
// The uint8_t and uint16_t base-10 paths use a byte-oriented fast path. A
// wider code unit whose low byte is an ASCII digit (e.g. U+2131..U+2139) must
// not be mistaken for that digit. The generic path already rejects these for
// int; the fixed-width fast paths must agree. Lengths of 1..5 exercise both
// the uint8_t path and the 4-digit uint16_t SWAR path.
{
const std::u16string bad16[] = {
u"",
u"ℱℲ",
u"ℱℲℳ",
u"ℱℲℳℴ",
u"ℱℲℳℴℵ",
};
const std::u32string bad32[] = {
U"",
U"ℱℲℳ",
U"ℱℲℳℴ",
U"ℱℲℳℴℵ",
};
bool failed = false;
for (auto const &s : bad16) {
uint8_t r8 = 123;
auto a8 = fast_float::from_chars(s.data(), s.data() + s.size(), r8);
if (a8.ec == std::errc()) {
failed = true;
std::cerr << "Incorrectly parsed wide units as uint8_t " << unsigned(r8)
<< "." << std::endl;
}
uint16_t r16 = 123;
auto a16 = fast_float::from_chars(s.data(), s.data() + s.size(), r16);
if (a16.ec == std::errc()) {
failed = true;
std::cerr << "Incorrectly parsed wide units as uint16_t " << r16 << "."
<< std::endl;
}
}
for (auto const &s : bad32) {
uint8_t r8 = 123;
auto a8 = fast_float::from_chars(s.data(), s.data() + s.size(), r8);
if (a8.ec == std::errc()) {
failed = true;
std::cerr << "Incorrectly parsed wide units as uint8_t " << unsigned(r8)
<< "." << std::endl;
}
uint16_t r16 = 123;
auto a16 = fast_float::from_chars(s.data(), s.data() + s.size(), r16);
if (a16.ec == std::errc()) {
failed = true;
std::cerr << "Incorrectly parsed wide units as uint16_t " << r16 << "."
<< std::endl;
}
}
if (failed) {
return EXIT_FAILURE;
}
}
return EXIT_SUCCESS;
}