This commit is contained in:
xxx 2025-11-03 22:15:37 +08:00
parent 33ad559eb8
commit 89c98fa328
3 changed files with 84 additions and 0 deletions

View File

@ -677,6 +677,7 @@ enum class presentation_type : unsigned char {
// String and pointer specifiers:
pointer = 3, // 'p'
u8replace, // 'u' for UTF-8 replace
// Floating-point specifiers:
exp = 1, // 'e' or 'E' (1 since there is no FP debug presentation)
@ -1576,6 +1577,9 @@ FMT_CONSTEXPR auto parse_format_specs(const Char* begin, const Char* end,
bool_set | string_set | cstring_set);
case 'p':
return parse_presentation_type(pres::pointer, pointer_set | cstring_set);
case 'u':
return parse_presentation_type(pres::u8replace,
string_set | cstring_set);
case '?':
return parse_presentation_type(pres::debug,
char_set | string_set | cstring_set);

View File

@ -1422,6 +1422,61 @@ FMT_FUNC detail::utf8_to_utf16::utf8_to_utf16(string_view s) {
buffer_.push_back(0);
}
namespace {
// Global UTF-8 error handler.
std::atomic<fmt::utf8::error_handler> global_utf8_error_handler;
} // namespace
FMT_FUNC auto fmt::utf8::replace_invalid(string_view s, char32_t replacement) -> std::string {
auto buffer = detail::memory_buffer();
for_each_codepoint(s, [&buffer, replacement](uint32_t cp, string_view sv) {
if (cp == invalid_code_point) {
// Replace invalid code point with the specified replacement character.
if (replacement <= 0x7F) {
buffer.push_back(static_cast<char>(replacement));
} else if (replacement <= 0x7FF) {
buffer.push_back(static_cast<char>(0xC0 | (replacement >> 6)));
buffer.push_back(static_cast<char>(0x80 | (replacement & 0x3F)));
} else if (replacement <= 0xFFFF) {
buffer.push_back(static_cast<char>(0xE0 | (replacement >> 12)));
buffer.push_back(static_cast<char>(0x80 | ((replacement & 0xFFF) >> 6)));
buffer.push_back(static_cast<char>(0x80 | (replacement & 0x3F)));
} else if (replacement <= 0x10FFFF) {
buffer.push_back(static_cast<char>(0xF0 | (replacement >> 18)));
buffer.push_back(static_cast<char>(0x80 | ((replacement & 0x3FFFF) >> 12)));
buffer.push_back(static_cast<char>(0x80 | ((replacement & 0xFFF) >> 6)));
buffer.push_back(static_cast<char>(0x80 | (replacement & 0x3F)));
}
} else {
// Copy valid code point as-is.
buffer.append(sv);
}
return true;
});
return to_string(buffer);
}
FMT_FUNC auto fmt::utf8::replace_invalid(string_view s, string_view replacement) -> std::string {
auto buffer = detail::memory_buffer();
for_each_codepoint(s, [&buffer, replacement](uint32_t cp, string_view sv) {
if (cp == invalid_code_point) {
// Replace invalid code point with the specified replacement string.
buffer.append(replacement);
} else {
// Copy valid code point as-is.
buffer.append(sv);
}
return true;
});
return to_string(buffer);
}
FMT_FUNC auto fmt::utf8::set_error_handler(error_handler handler) -> error_handler {
return global_utf8_error_handler.exchange(std::move(handler));
}
FMT_FUNC void format_system_error(detail::buffer<char>& out, int error_code,
const char* message) noexcept {
FMT_TRY {

View File

@ -197,6 +197,22 @@ template <typename Char, typename Traits, typename Allocator>
struct is_contiguous<std::basic_string<Char, Traits, Allocator>>
: std::true_type {};
namespace utf8 {
// Replace invalid UTF-8 byte sequences with the specified replacement character.
FMT_API auto replace_invalid(string_view s, char32_t replacement = U'<EFBFBD>') -> std::string;
// Replace invalid UTF-8 byte sequences with the specified replacement string.
FMT_API auto replace_invalid(string_view s, string_view replacement) -> std::string;
// A function type for custom UTF-8 error handlers.
typedef std::function<void(uint32_t cp, string_view sv)> error_handler;
// Register a custom UTF-8 error handler. Returns the previous handler.
FMT_API error_handler set_error_handler(error_handler handler);
} // namespace utf8
namespace detail {
// __builtin_clz is broken in clang with Microsoft codegen:
@ -2171,6 +2187,15 @@ template <typename Char, typename OutputIt,
FMT_CONSTEXPR auto write(OutputIt out, basic_string_view<Char> s,
const format_specs& specs) -> OutputIt {
bool is_debug = specs.type() == presentation_type::debug;
bool do_u8replace = specs.type() == presentation_type::u8replace;
// Handle u8replace option
if (do_u8replace) {
std::string replaced;
fmt::utf8::replace_invalid(s, std::back_inserter(replaced));
s = basic_string_view<Char>(replaced.data(), replaced.size());
}
if (specs.precision < 0 && specs.width == 0) {
auto&& it = reserve(out, s.size());
return is_debug ? write_escaped_string(it, s) : copy<char>(s, it);