From 938ba67c7a1551dd29b9db902f64b98db93f959b Mon Sep 17 00:00:00 2001 From: mutouyun Date: Sun, 30 Oct 2022 14:55:15 +0800 Subject: [PATCH] upd: [imp] using the win api corresponds to the character set conversion in win --- src/libimp/codecvt.cpp | 37 ++++++++---- src/libimp/platform/win/codecvt.h | 97 +++++++++++++++++++++++++++++++ test/test_imp_codecvt.cpp | 26 +++++---- 3 files changed, 136 insertions(+), 24 deletions(-) create mode 100644 src/libimp/platform/win/codecvt.h diff --git a/src/libimp/codecvt.cpp b/src/libimp/codecvt.cpp index 57c6e0c..417b704 100644 --- a/src/libimp/codecvt.cpp +++ b/src/libimp/codecvt.cpp @@ -8,13 +8,22 @@ #include "libimp/log.h" #include "libimp/detect_plat.h" +#if defined(LIBIMP_OS_WIN) +#include "libimp/platform/win/codecvt.h" +#endif + LIBIMP_NAMESPACE_BEG_ /** - * @brief The transform between local-character-set(UTF-8/GBK/...) and UTF-16/32 + * @brief The transform between local-character-set(UTF-8/GBK/...) and UTF-16/32. * - * Modified from UnicodeConverter - * Copyright (c) 2010. Jianhui Qin (http://blog.csdn.net/jhqin) + * Modified from UnicodeConverter. + * Copyright (c) 2010. Jianhui Qin (http://blog.csdn.net/jhqin). + * + * @remarks codecvt_utf8_utf16/std::wstring_convert is deprecated. + * @see https://codingtidbit.com/2020/02/09/c17-codecvt_utf8-is-deprecated/ + * https://stackoverflow.com/questions/42946335/deprecated-header-codecvt-replacement + * https://en.cppreference.com/w/cpp/locale/codecvt/in */ namespace { @@ -323,33 +332,37 @@ auto cvt_cstr_utf(T const *src, std::size_t slen, U *des, std::size_t dlen) noex // #define LIBIMP_DEF_CVT_CSTR_($char_t, $char_u) LIBIMP_DEF_CVT_CSTR_(char , char) -LIBIMP_DEF_CVT_CSTR_(char , wchar_t) LIBIMP_DEF_CVT_CSTR_(char , char16_t) LIBIMP_DEF_CVT_CSTR_(char , char32_t) LIBIMP_DEF_CVT_CSTR_(wchar_t , wchar_t) -LIBIMP_DEF_CVT_CSTR_(wchar_t , char) -LIBIMP_DEF_CVT_CSTR_(wchar_t , char16_t) -LIBIMP_DEF_CVT_CSTR_(wchar_t , char32_t) LIBIMP_DEF_CVT_CSTR_(char16_t, char16_t) LIBIMP_DEF_CVT_CSTR_(char16_t, char) -LIBIMP_DEF_CVT_CSTR_(char16_t, wchar_t) LIBIMP_DEF_CVT_CSTR_(char16_t, char32_t) LIBIMP_DEF_CVT_CSTR_(char32_t, char32_t) LIBIMP_DEF_CVT_CSTR_(char32_t, char) -LIBIMP_DEF_CVT_CSTR_(char32_t, wchar_t) LIBIMP_DEF_CVT_CSTR_(char32_t, char16_t) +#if !defined(LIBIMP_OS_WIN) +LIBIMP_DEF_CVT_CSTR_(char , wchar_t) +LIBIMP_DEF_CVT_CSTR_(wchar_t , char) +LIBIMP_DEF_CVT_CSTR_(wchar_t , char16_t) +LIBIMP_DEF_CVT_CSTR_(wchar_t , char32_t) +LIBIMP_DEF_CVT_CSTR_(char16_t, wchar_t) +LIBIMP_DEF_CVT_CSTR_(char32_t, wchar_t) +#endif // !defined(LIBIMP_OS_WIN) #if defined(LIBIMP_CPP_20) LIBIMP_DEF_CVT_CSTR_(char8_t , char8_t) LIBIMP_DEF_CVT_CSTR_(char8_t , char) -LIBIMP_DEF_CVT_CSTR_(char8_t , wchar_t) LIBIMP_DEF_CVT_CSTR_(char8_t , char16_t) LIBIMP_DEF_CVT_CSTR_(char8_t , char32_t) LIBIMP_DEF_CVT_CSTR_(char , char8_t) -LIBIMP_DEF_CVT_CSTR_(wchar_t , char8_t) LIBIMP_DEF_CVT_CSTR_(char16_t, char8_t) LIBIMP_DEF_CVT_CSTR_(char32_t, char8_t) -#endif +#if !defined(LIBIMP_OS_WIN) +LIBIMP_DEF_CVT_CSTR_(char8_t , wchar_t) +LIBIMP_DEF_CVT_CSTR_(wchar_t , char8_t) +#endif // !defined(LIBIMP_OS_WIN) +#endif // defined(LIBIMP_CPP_20) #undef LIBIMP_DEF_CVT_CSTR_ diff --git a/src/libimp/platform/win/codecvt.h b/src/libimp/platform/win/codecvt.h new file mode 100644 index 0000000..8d865a7 --- /dev/null +++ b/src/libimp/platform/win/codecvt.h @@ -0,0 +1,97 @@ +/** + * @file libimp/platform/win/codecvt.h + * @author mutouyun (orz@orzz.org) + */ +#pragma once + +#include + +#include "libimp/codecvt.h" +#include "libimp/log.h" +#include "libimp/system.h" +#include "libimp/detect_plat.h" + +LIBIMP_NAMESPACE_BEG_ + +/** + * @see https://docs.microsoft.com/en-us/windows/win32/api/stringapiset/nf-stringapiset-multibytetowidechar + * https://docs.microsoft.com/en-us/windows/win32/api/stringapiset/nf-stringapiset-widechartomultibyte + * + * CP_ACP : The system default Windows ANSI code page. + * CP_MACCP : The current system Macintosh code page. + * CP_OEMCP : The current system OEM code page. + * CP_SYMBOL : Symbol code page (42). + * CP_THREAD_ACP: The Windows ANSI code page for the current thread. + * CP_UTF7 : UTF-7. Use this value only when forced by a 7-bit transport mechanism. Use of UTF-8 is preferred. + * CP_UTF8 : UTF-8. +*/ + +template <> +std::size_t cvt_cstr(char const *src, std::size_t slen, wchar_t *des, std::size_t dlen) noexcept { + LIBIMP_LOG_(); + if ((src == nullptr) || ((*src) == 0) || (slen == 0)) { + log.error("source string is empty."); + return 0; + } + int cch_wc = (des == nullptr) ? 0 : (int)dlen; + int size_needed = ::MultiByteToWideChar(CP_ACP, 0, src, (int)slen, des, cch_wc); + if (size_needed <= 0) { + log.error("MultiByteToWideChar fails. error = {}", sys::error_code()); + } + return size_needed; +} + +template <> +std::size_t cvt_cstr(wchar_t const *src, std::size_t slen, char *des, std::size_t dlen) noexcept { + LIBIMP_LOG_(); + if ((src == nullptr) || ((*src) == 0) || (slen == 0)) { + log.error("source string is empty."); + return 0; + } + int cb_mb = (des == nullptr) ? 0 : (int)dlen; + int size_needed = ::WideCharToMultiByte(CP_ACP, 0, src, (int)slen, des, cb_mb, NULL, NULL); + if (size_needed <= 0) { + log.error("WideCharToMultiByte fails. error = {}", sys::error_code()); + } + return size_needed; +} + +/** + * @brief Used for char8_t (since C++20) to wchar_t conversion. + * + * There is no ut to guarantee correctness (I'm a little lazy here), + * so if there are any bugs, please contact me in time. + */ +#if defined(LIBIMP_CPP_20) +template <> +std::size_t cvt_cstr(char8_t const *src, std::size_t slen, wchar_t *des, std::size_t dlen) noexcept { + LIBIMP_LOG_(); + if ((src == nullptr) || ((*src) == 0) || (slen == 0)) { + log.error("source string is empty."); + return 0; + } + int cch_wc = (des == nullptr) ? 0 : (int)dlen; + int size_needed = ::MultiByteToWideChar(CP_UTF8, 0, (char *)src, (int)slen, des, cch_wc); + if (size_needed <= 0) { + log.error("MultiByteToWideChar fails. error = {}", sys::error_code()); + } + return size_needed; +} + +template <> +std::size_t cvt_cstr(wchar_t const *src, std::size_t slen, char8_t *des, std::size_t dlen) noexcept { + LIBIMP_LOG_(); + if ((src == nullptr) || ((*src) == 0) || (slen == 0)) { + log.error("source string is empty."); + return 0; + } + int cb_mb = (des == nullptr) ? 0 : (int)dlen; + int size_needed = ::WideCharToMultiByte(CP_UTF8, 0, src, (int)slen, (char *)des, cb_mb, NULL, NULL); + if (size_needed <= 0) { + log.error("WideCharToMultiByte fails. error = {}", sys::error_code()); + } + return size_needed; +} +#endif // defined(LIBIMP_CPP_20) + +LIBIMP_NAMESPACE_END_ diff --git a/test/test_imp_codecvt.cpp b/test/test_imp_codecvt.cpp index 34d5b12..2e8c8e4 100644 --- a/test/test_imp_codecvt.cpp +++ b/test/test_imp_codecvt.cpp @@ -5,39 +5,41 @@ #include "gtest/gtest.h" #include "libimp/codecvt.h" +#include "libimp/countof.h" TEST(codecvt, cvt_cstr) { char const *utf8 = "hello world, " "\xe4\xbd\xa0\xe5\xa5\xbd\xef\xbc" "\x8c\xe3\x81\x93\xe3\x82\x93\xe3" "\x81\xab\xe3\x81\xa1\xe3\x81\xaf"; - wchar_t const *utf16 = L"hello world, \u4f60\u597d\uff0c\u3053\u3093\u306b\u3061\u306f"; + char16_t const utf16[] = u"hello world, " + "\u4f60\u597d\uff0c\u3053\u3093\u306b\u3061\u306f"; { - auto cvt_len = imp::cvt_cstr(utf8, std::strlen(utf8), (wchar_t *)nullptr, 0); + auto cvt_len = imp::cvt_cstr(utf8, std::strlen(utf8), (char16_t *)nullptr, 0); EXPECT_NE(cvt_len, 0); - std::wstring wstr(cvt_len, L'\0'); + std::u16string wstr(cvt_len, L'\0'); EXPECT_EQ(imp::cvt_cstr(utf8, std::strlen(utf8), &wstr[0], wstr.size()), cvt_len); - EXPECT_STREQ(wstr.c_str(), utf16); + EXPECT_EQ(wstr, utf16); } { - auto cvt_len = imp::cvt_cstr(utf16, std::wcslen(utf16), (char *)nullptr, 0); + auto cvt_len = imp::cvt_cstr(utf16, imp::countof(utf16) - 1, (char *)nullptr, 0); EXPECT_NE(cvt_len, 0); std::string str(cvt_len, '\0'); - EXPECT_EQ(imp::cvt_cstr(utf16, std::wcslen(utf16), &str[0], str.size()), cvt_len); - EXPECT_STREQ(str.c_str(), utf8); + EXPECT_EQ(imp::cvt_cstr(utf16, imp::countof(utf16) - 1, &str[0], str.size()), cvt_len); + EXPECT_EQ(str, utf8); } { auto cvt_len = imp::cvt_cstr(utf8, std::strlen(utf8), (char *)nullptr, 0); EXPECT_EQ(cvt_len, std::strlen(utf8)); std::string str(cvt_len, '\0'); EXPECT_EQ(imp::cvt_cstr(utf8, cvt_len, &str[0], str.size()), cvt_len); - EXPECT_STREQ(str.c_str(), utf8); + EXPECT_EQ(str, utf8); } { - auto cvt_len = imp::cvt_cstr(utf16, std::wcslen(utf16), (wchar_t *)nullptr, 0); - EXPECT_EQ(cvt_len, std::wcslen(utf16)); - std::wstring wstr(cvt_len, L'\0'); + auto cvt_len = imp::cvt_cstr(utf16, imp::countof(utf16) - 1, (char16_t *)nullptr, 0); + EXPECT_EQ(cvt_len, imp::countof(utf16) - 1); + std::u16string wstr(cvt_len, u'\0'); EXPECT_EQ(imp::cvt_cstr(utf16, cvt_len, &wstr[0], wstr.size()), cvt_len); - EXPECT_STREQ(wstr.c_str(), utf16); + EXPECT_EQ(wstr, utf16); } } \ No newline at end of file