From 62ed60e95f338dfa625490f4b9575a480e08f154 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 22 Dec 2025 21:08:59 -0500 Subject: [PATCH 1/5] simplify the benchmark --- benchmarks/CMakeLists.txt | 2 +- benchmarks/bench_ip.cpp | 135 ++++++++++++++++++++++++-------------- 2 files changed, 85 insertions(+), 52 deletions(-) diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index cfa48b8..791cf61 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -3,7 +3,7 @@ include(FetchContent) FetchContent_Declare( counters GIT_REPOSITORY https://github.com/lemire/counters.git - GIT_TAG v2.0.0 + GIT_TAG v2.1.0 ) FetchContent_MakeAvailable(counters) diff --git a/benchmarks/bench_ip.cpp b/benchmarks/bench_ip.cpp index 782358e..771ebb8 100644 --- a/benchmarks/bench_ip.cpp +++ b/benchmarks/bench_ip.cpp @@ -26,6 +26,28 @@ void pretty_print(size_t volume, size_t bytes, std::string name, printf("\n"); } +const char *seek_ip_end(const char *p, const char *pend) { + const char *current = p; + size_t count = 0; + for (; current != pend; ++current) { + if (*current == '.') { + count++; + if (count == 3) { + ++current; + break; + } + } + } + while (current != pend) { + if (*current <= '9' && *current >= '0') { + ++current; + } else { + break; + } + } + return current; +} + int parse_u8_fastfloat(const char *&p, const char *pend, uint8_t *out) { if (p == pend) return 0; @@ -39,8 +61,9 @@ int parse_u8_fastfloat(const char *&p, const char *pend, uint8_t *out) { static inline int parse_u8_fromchars(const char *&p, const char *pend, uint8_t *out) { - if (p == pend) + if (p == pend) { return 0; + } auto r = std::from_chars(p, pend, *out); if (r.ec == std::errc()) { p = r.ptr; @@ -50,26 +73,35 @@ static inline int parse_u8_fromchars(const char *&p, const char *pend, } template -static inline int parse_ip_line(const char *&p, const char *pend, uint32_t &sum, - Parser parse_uint8) { - uint8_t o = 0; - for (int i = 0; i < 4; ++i) { - if (!parse_uint8(p, pend, &o)) - return 0; - sum += o; - if (i != 3) { - if (p == pend || *p != '.') - return 0; - ++p; - } +std::pair simple_parse_ip_line(const char *p, const char *pend, + Parser parse_uint8) { + uint8_t v1; + if (!parse_uint8(p, pend, &v1)) { + return {false, 0}; } - // consume optional '\r' - if (p != pend && *p == '\r') - ++p; - // expect '\n' or end - if (p != pend && *p == '\n') - ++p; - return 1; + if (p == pend || *p++ != '.') { + return {false, 0}; + } + uint8_t v2; + if (!parse_uint8(p, pend, &v2)) { + return {false, 0}; + } + if (p == pend || *p++ != '.') { + return {false, 0}; + } + uint8_t v3; + if (!parse_uint8(p, pend, &v3)) { + return {false, 0}; + } + if (p == pend || *p++ != '.') { + return {false, 0}; + } + uint8_t v4; + if (!parse_uint8(p, pend, &v4)) { + return {false, 0}; + } + return {true, (uint32_t(v1) << 24) | (uint32_t(v2) << 16) | + (uint32_t(v3) << 8) | uint32_t(v4)}; } static std::string make_ip_line(uint8_t a, uint8_t b, uint8_t c, uint8_t d) { @@ -87,19 +119,22 @@ static std::string make_ip_line(uint8_t a, uint8_t b, uint8_t c, uint8_t d) { } int main() { - constexpr size_t N = 500000; + constexpr size_t N = 15000; std::mt19937 rng(1234); std::uniform_int_distribution dist(0, 255); std::string buf; - buf.reserve(N * 16); + constexpr size_t ip_size = 16; + buf.reserve(N * ip_size); for (size_t i = 0; i < N; ++i) { uint8_t a = (uint8_t)dist(rng); uint8_t b = (uint8_t)dist(rng); uint8_t c = (uint8_t)dist(rng); uint8_t d = (uint8_t)dist(rng); - buf += make_ip_line(a, b, c, d); + std::string ip_line = make_ip_line(a, b, c, d); + ip_line.resize(ip_size, ' '); // pad to fixed size + buf.append(ip_line); } // sentinel to allow 4-byte loads at end @@ -108,30 +143,21 @@ int main() { const size_t bytes = buf.size() - 4; // exclude sentinel from throughput const size_t volume = N; - // validate correctness - { - const char *start = buf.data(); - const char *end = buf.data() + bytes; - const char *p = start; - const char *pend = end; - uint32_t sum = 0; - for (size_t i = 0; i < N; ++i) { - int ok = parse_ip_line(p, pend, sum, parse_u8_fromchars); - if (!ok) { - std::fprintf(stderr, "fromchars parse failed at line %zu\n", i); - std::abort(); - } - p = start; - pend = end; - ok = parse_ip_line(p, pend, sum, parse_u8_fastfloat); - if (!ok) { - std::fprintf(stderr, "fastswar parse failed at line %zu\n", i); - std::abort(); - } - } - } + volatile uint32_t sink = 0; - uint32_t sink = 0; + pretty_print(volume, bytes, "just_seek_ip_end (no parse)", + counters::bench([&]() { + const char *p = buf.data(); + const char *pend = buf.data() + bytes; + uint32_t sum = 0; + int ok = 0; + for (size_t i = 0; i < N; ++i) { + const char *q = seek_ip_end(p, pend); + sum += (uint32_t)(q - p); + p += ip_size; + } + sink += sum; + })); pretty_print(volume, bytes, "parse_ip_std_fromchars", counters::bench([&]() { const char *p = buf.data(); @@ -139,9 +165,13 @@ int main() { uint32_t sum = 0; int ok = 0; for (size_t i = 0; i < N; ++i) { - ok = parse_ip_line(p, pend, sum, parse_u8_fromchars); - if (!ok) + auto [ok, ip] = + simple_parse_ip_line(p, pend, parse_u8_fromchars); + sum += ip; + if (!ok) { std::abort(); + } + p += ip_size; } sink += sum; })); @@ -152,13 +182,16 @@ int main() { uint32_t sum = 0; int ok = 0; for (size_t i = 0; i < N; ++i) { - ok = parse_ip_line(p, pend, sum, parse_u8_fastfloat); - if (!ok) + auto [ok, ip] = + simple_parse_ip_line(p, pend, parse_u8_fastfloat); + sum += ip; + if (!ok) { std::abort(); + } + p += ip_size; } sink += sum; })); - std::printf("sink=%u\n", sink); return EXIT_SUCCESS; } \ No newline at end of file From 55723db171209640190f27051a5d34b7fc4473e8 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 22 Dec 2025 22:01:50 -0500 Subject: [PATCH 2/5] add a memcpy baseline --- benchmarks/bench_ip.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/benchmarks/bench_ip.cpp b/benchmarks/bench_ip.cpp index 771ebb8..36993b8 100644 --- a/benchmarks/bench_ip.cpp +++ b/benchmarks/bench_ip.cpp @@ -13,12 +13,12 @@ void pretty_print(size_t volume, size_t bytes, std::string name, counters::event_aggregate agg) { printf("%-40s : ", name.c_str()); printf(" %5.2f GB/s ", bytes / agg.fastest_elapsed_ns()); - printf(" %5.1f Ma/s ", volume * 1000.0 / agg.fastest_elapsed_ns()); - printf(" %5.2f ns/d ", agg.fastest_elapsed_ns() / volume); + printf(" %5.1f Mip/s ", volume * 1000.0 / agg.fastest_elapsed_ns()); + printf(" %5.2f ns/ip ", agg.fastest_elapsed_ns() / volume); if (counters::event_collector().has_events()) { printf(" %5.2f GHz ", agg.fastest_cycles() / agg.fastest_elapsed_ns()); - printf(" %5.2f c/d ", agg.fastest_cycles() / volume); - printf(" %5.2f i/d ", agg.fastest_instructions() / volume); + printf(" %5.2f c/ip ", agg.fastest_cycles() / volume); + printf(" %5.2f i/ip ", agg.fastest_instructions() / volume); printf(" %5.2f c/b ", agg.fastest_cycles() / bytes); printf(" %5.2f i/b ", agg.fastest_instructions() / bytes); printf(" %5.2f i/c ", agg.fastest_instructions() / agg.fastest_cycles()); @@ -144,6 +144,13 @@ int main() { const size_t volume = N; volatile uint32_t sink = 0; + std::string buffer(ip_size * N, ' '); + + pretty_print(volume, bytes, "memcpy baseline", + counters::bench([&]() { + std::memcpy((char *)buffer.data(), buf.data(), bytes); + })); + pretty_print(volume, bytes, "just_seek_ip_end (no parse)", counters::bench([&]() { From b5ae54cb21a7b446fe35c3b41f4a738ed55fcaa2 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 22 Dec 2025 22:18:39 -0500 Subject: [PATCH 3/5] adding a memcpy benchmark and ensure inlining. --- benchmarks/bench_ip.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/benchmarks/bench_ip.cpp b/benchmarks/bench_ip.cpp index 36993b8..dbfe7cc 100644 --- a/benchmarks/bench_ip.cpp +++ b/benchmarks/bench_ip.cpp @@ -26,7 +26,7 @@ void pretty_print(size_t volume, size_t bytes, std::string name, printf("\n"); } -const char *seek_ip_end(const char *p, const char *pend) { +fastfloat_really_inline const char *seek_ip_end(const char *p, const char *pend) { const char *current = p; size_t count = 0; for (; current != pend; ++current) { @@ -48,7 +48,7 @@ const char *seek_ip_end(const char *p, const char *pend) { return current; } -int parse_u8_fastfloat(const char *&p, const char *pend, uint8_t *out) { +fastfloat_really_inline int parse_u8_fastfloat(const char *&p, const char *pend, uint8_t *out) { if (p == pend) return 0; auto r = fast_float::from_chars(p, pend, *out); @@ -59,7 +59,7 @@ int parse_u8_fastfloat(const char *&p, const char *pend, uint8_t *out) { return 0; } -static inline int parse_u8_fromchars(const char *&p, const char *pend, +fastfloat_really_inline int parse_u8_fromchars(const char *&p, const char *pend, uint8_t *out) { if (p == pend) { return 0; @@ -73,7 +73,7 @@ static inline int parse_u8_fromchars(const char *&p, const char *pend, } template -std::pair simple_parse_ip_line(const char *p, const char *pend, +fastfloat_really_inline std::pair simple_parse_ip_line(const char *p, const char *pend, Parser parse_uint8) { uint8_t v1; if (!parse_uint8(p, pend, &v1)) { From bfa7bccea197ec523f3a9966f8e5d5615dbcf490 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Tue, 23 Dec 2025 11:46:17 -0500 Subject: [PATCH 4/5] lint --- benchmarks/bench_ip.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/benchmarks/bench_ip.cpp b/benchmarks/bench_ip.cpp index dbfe7cc..90b07fc 100644 --- a/benchmarks/bench_ip.cpp +++ b/benchmarks/bench_ip.cpp @@ -26,7 +26,8 @@ void pretty_print(size_t volume, size_t bytes, std::string name, printf("\n"); } -fastfloat_really_inline const char *seek_ip_end(const char *p, const char *pend) { +fastfloat_really_inline const char *seek_ip_end(const char *p, + const char *pend) { const char *current = p; size_t count = 0; for (; current != pend; ++current) { @@ -48,7 +49,8 @@ fastfloat_really_inline const char *seek_ip_end(const char *p, const char *pend) return current; } -fastfloat_really_inline int parse_u8_fastfloat(const char *&p, const char *pend, uint8_t *out) { +fastfloat_really_inline int parse_u8_fastfloat(const char *&p, const char *pend, + uint8_t *out) { if (p == pend) return 0; auto r = fast_float::from_chars(p, pend, *out); @@ -60,7 +62,7 @@ fastfloat_really_inline int parse_u8_fastfloat(const char *&p, const char *pend, } fastfloat_really_inline int parse_u8_fromchars(const char *&p, const char *pend, - uint8_t *out) { + uint8_t *out) { if (p == pend) { return 0; } @@ -73,8 +75,8 @@ fastfloat_really_inline int parse_u8_fromchars(const char *&p, const char *pend, } template -fastfloat_really_inline std::pair simple_parse_ip_line(const char *p, const char *pend, - Parser parse_uint8) { +fastfloat_really_inline std::pair +simple_parse_ip_line(const char *p, const char *pend, Parser parse_uint8) { uint8_t v1; if (!parse_uint8(p, pend, &v1)) { return {false, 0}; @@ -146,12 +148,10 @@ int main() { volatile uint32_t sink = 0; std::string buffer(ip_size * N, ' '); - pretty_print(volume, bytes, "memcpy baseline", - counters::bench([&]() { - std::memcpy((char *)buffer.data(), buf.data(), bytes); + pretty_print(volume, bytes, "memcpy baseline", counters::bench([&]() { + std::memcpy((char *)buffer.data(), buf.data(), bytes); })); - pretty_print(volume, bytes, "just_seek_ip_end (no parse)", counters::bench([&]() { const char *p = buf.data(); From 75d01f02e2f520b3f1d9037085023306d857771c Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Tue, 23 Dec 2025 12:07:15 -0500 Subject: [PATCH 5/5] display the inner count (check) --- benchmarks/bench_ip.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/benchmarks/bench_ip.cpp b/benchmarks/bench_ip.cpp index 90b07fc..761ebc1 100644 --- a/benchmarks/bench_ip.cpp +++ b/benchmarks/bench_ip.cpp @@ -11,6 +11,9 @@ void pretty_print(size_t volume, size_t bytes, std::string name, counters::event_aggregate agg) { + if (agg.inner_count > 1) { + printf("# (inner count: %d)\n", agg.inner_count); + } printf("%-40s : ", name.c_str()); printf(" %5.2f GB/s ", bytes / agg.fastest_elapsed_ns()); printf(" %5.1f Mip/s ", volume * 1000.0 / agg.fastest_elapsed_ns());