upd: [imp] using the win api corresponds to the character set conversion in win

This commit is contained in:
mutouyun 2022-10-30 14:55:15 +08:00
parent 7d39e5395a
commit 938ba67c7a
3 changed files with 136 additions and 24 deletions

View File

@ -8,13 +8,22 @@
#include "libimp/log.h"
#include "libimp/detect_plat.h"
#if defined(LIBIMP_OS_WIN)
#include "libimp/platform/win/codecvt.h"
#endif
LIBIMP_NAMESPACE_BEG_
/**
* @brief The transform between local-character-set(UTF-8/GBK/...) and UTF-16/32
* @brief The transform between local-character-set(UTF-8/GBK/...) and UTF-16/32.
*
* Modified from UnicodeConverter
* Copyright (c) 2010. Jianhui Qin (http://blog.csdn.net/jhqin)
* Modified from UnicodeConverter.
* Copyright (c) 2010. Jianhui Qin (http://blog.csdn.net/jhqin).
*
* @remarks codecvt_utf8_utf16/std::wstring_convert is deprecated.
* @see https://codingtidbit.com/2020/02/09/c17-codecvt_utf8-is-deprecated/
* https://stackoverflow.com/questions/42946335/deprecated-header-codecvt-replacement
* https://en.cppreference.com/w/cpp/locale/codecvt/in
*/
namespace {
@ -323,33 +332,37 @@ auto cvt_cstr_utf(T const *src, std::size_t slen, U *des, std::size_t dlen) noex
// #define LIBIMP_DEF_CVT_CSTR_($char_t, $char_u)
LIBIMP_DEF_CVT_CSTR_(char , char)
LIBIMP_DEF_CVT_CSTR_(char , wchar_t)
LIBIMP_DEF_CVT_CSTR_(char , char16_t)
LIBIMP_DEF_CVT_CSTR_(char , char32_t)
LIBIMP_DEF_CVT_CSTR_(wchar_t , wchar_t)
LIBIMP_DEF_CVT_CSTR_(wchar_t , char)
LIBIMP_DEF_CVT_CSTR_(wchar_t , char16_t)
LIBIMP_DEF_CVT_CSTR_(wchar_t , char32_t)
LIBIMP_DEF_CVT_CSTR_(char16_t, char16_t)
LIBIMP_DEF_CVT_CSTR_(char16_t, char)
LIBIMP_DEF_CVT_CSTR_(char16_t, wchar_t)
LIBIMP_DEF_CVT_CSTR_(char16_t, char32_t)
LIBIMP_DEF_CVT_CSTR_(char32_t, char32_t)
LIBIMP_DEF_CVT_CSTR_(char32_t, char)
LIBIMP_DEF_CVT_CSTR_(char32_t, wchar_t)
LIBIMP_DEF_CVT_CSTR_(char32_t, char16_t)
#if !defined(LIBIMP_OS_WIN)
LIBIMP_DEF_CVT_CSTR_(char , wchar_t)
LIBIMP_DEF_CVT_CSTR_(wchar_t , char)
LIBIMP_DEF_CVT_CSTR_(wchar_t , char16_t)
LIBIMP_DEF_CVT_CSTR_(wchar_t , char32_t)
LIBIMP_DEF_CVT_CSTR_(char16_t, wchar_t)
LIBIMP_DEF_CVT_CSTR_(char32_t, wchar_t)
#endif // !defined(LIBIMP_OS_WIN)
#if defined(LIBIMP_CPP_20)
LIBIMP_DEF_CVT_CSTR_(char8_t , char8_t)
LIBIMP_DEF_CVT_CSTR_(char8_t , char)
LIBIMP_DEF_CVT_CSTR_(char8_t , wchar_t)
LIBIMP_DEF_CVT_CSTR_(char8_t , char16_t)
LIBIMP_DEF_CVT_CSTR_(char8_t , char32_t)
LIBIMP_DEF_CVT_CSTR_(char , char8_t)
LIBIMP_DEF_CVT_CSTR_(wchar_t , char8_t)
LIBIMP_DEF_CVT_CSTR_(char16_t, char8_t)
LIBIMP_DEF_CVT_CSTR_(char32_t, char8_t)
#endif
#if !defined(LIBIMP_OS_WIN)
LIBIMP_DEF_CVT_CSTR_(char8_t , wchar_t)
LIBIMP_DEF_CVT_CSTR_(wchar_t , char8_t)
#endif // !defined(LIBIMP_OS_WIN)
#endif // defined(LIBIMP_CPP_20)
#undef LIBIMP_DEF_CVT_CSTR_

View File

@ -0,0 +1,97 @@
/**
* @file libimp/platform/win/codecvt.h
* @author mutouyun (orz@orzz.org)
*/
#pragma once
#include <Windows.h>
#include "libimp/codecvt.h"
#include "libimp/log.h"
#include "libimp/system.h"
#include "libimp/detect_plat.h"
LIBIMP_NAMESPACE_BEG_
/**
* @see https://docs.microsoft.com/en-us/windows/win32/api/stringapiset/nf-stringapiset-multibytetowidechar
* https://docs.microsoft.com/en-us/windows/win32/api/stringapiset/nf-stringapiset-widechartomultibyte
*
* CP_ACP : The system default Windows ANSI code page.
* CP_MACCP : The current system Macintosh code page.
* CP_OEMCP : The current system OEM code page.
* CP_SYMBOL : Symbol code page (42).
* CP_THREAD_ACP: The Windows ANSI code page for the current thread.
* CP_UTF7 : UTF-7. Use this value only when forced by a 7-bit transport mechanism. Use of UTF-8 is preferred.
* CP_UTF8 : UTF-8.
*/
template <>
std::size_t cvt_cstr(char const *src, std::size_t slen, wchar_t *des, std::size_t dlen) noexcept {
LIBIMP_LOG_();
if ((src == nullptr) || ((*src) == 0) || (slen == 0)) {
log.error("source string is empty.");
return 0;
}
int cch_wc = (des == nullptr) ? 0 : (int)dlen;
int size_needed = ::MultiByteToWideChar(CP_ACP, 0, src, (int)slen, des, cch_wc);
if (size_needed <= 0) {
log.error("MultiByteToWideChar fails. error = {}", sys::error_code());
}
return size_needed;
}
template <>
std::size_t cvt_cstr(wchar_t const *src, std::size_t slen, char *des, std::size_t dlen) noexcept {
LIBIMP_LOG_();
if ((src == nullptr) || ((*src) == 0) || (slen == 0)) {
log.error("source string is empty.");
return 0;
}
int cb_mb = (des == nullptr) ? 0 : (int)dlen;
int size_needed = ::WideCharToMultiByte(CP_ACP, 0, src, (int)slen, des, cb_mb, NULL, NULL);
if (size_needed <= 0) {
log.error("WideCharToMultiByte fails. error = {}", sys::error_code());
}
return size_needed;
}
/**
* @brief Used for char8_t (since C++20) to wchar_t conversion.
*
* There is no ut to guarantee correctness (I'm a little lazy here),
* so if there are any bugs, please contact me in time.
*/
#if defined(LIBIMP_CPP_20)
template <>
std::size_t cvt_cstr(char8_t const *src, std::size_t slen, wchar_t *des, std::size_t dlen) noexcept {
LIBIMP_LOG_();
if ((src == nullptr) || ((*src) == 0) || (slen == 0)) {
log.error("source string is empty.");
return 0;
}
int cch_wc = (des == nullptr) ? 0 : (int)dlen;
int size_needed = ::MultiByteToWideChar(CP_UTF8, 0, (char *)src, (int)slen, des, cch_wc);
if (size_needed <= 0) {
log.error("MultiByteToWideChar fails. error = {}", sys::error_code());
}
return size_needed;
}
template <>
std::size_t cvt_cstr(wchar_t const *src, std::size_t slen, char8_t *des, std::size_t dlen) noexcept {
LIBIMP_LOG_();
if ((src == nullptr) || ((*src) == 0) || (slen == 0)) {
log.error("source string is empty.");
return 0;
}
int cb_mb = (des == nullptr) ? 0 : (int)dlen;
int size_needed = ::WideCharToMultiByte(CP_UTF8, 0, src, (int)slen, (char *)des, cb_mb, NULL, NULL);
if (size_needed <= 0) {
log.error("WideCharToMultiByte fails. error = {}", sys::error_code());
}
return size_needed;
}
#endif // defined(LIBIMP_CPP_20)
LIBIMP_NAMESPACE_END_

View File

@ -5,39 +5,41 @@
#include "gtest/gtest.h"
#include "libimp/codecvt.h"
#include "libimp/countof.h"
TEST(codecvt, cvt_cstr) {
char const *utf8 = "hello world, "
"\xe4\xbd\xa0\xe5\xa5\xbd\xef\xbc"
"\x8c\xe3\x81\x93\xe3\x82\x93\xe3"
"\x81\xab\xe3\x81\xa1\xe3\x81\xaf";
wchar_t const *utf16 = L"hello world, \u4f60\u597d\uff0c\u3053\u3093\u306b\u3061\u306f";
char16_t const utf16[] = u"hello world, "
"\u4f60\u597d\uff0c\u3053\u3093\u306b\u3061\u306f";
{
auto cvt_len = imp::cvt_cstr(utf8, std::strlen(utf8), (wchar_t *)nullptr, 0);
auto cvt_len = imp::cvt_cstr(utf8, std::strlen(utf8), (char16_t *)nullptr, 0);
EXPECT_NE(cvt_len, 0);
std::wstring wstr(cvt_len, L'\0');
std::u16string wstr(cvt_len, L'\0');
EXPECT_EQ(imp::cvt_cstr(utf8, std::strlen(utf8), &wstr[0], wstr.size()), cvt_len);
EXPECT_STREQ(wstr.c_str(), utf16);
EXPECT_EQ(wstr, utf16);
}
{
auto cvt_len = imp::cvt_cstr(utf16, std::wcslen(utf16), (char *)nullptr, 0);
auto cvt_len = imp::cvt_cstr(utf16, imp::countof(utf16) - 1, (char *)nullptr, 0);
EXPECT_NE(cvt_len, 0);
std::string str(cvt_len, '\0');
EXPECT_EQ(imp::cvt_cstr(utf16, std::wcslen(utf16), &str[0], str.size()), cvt_len);
EXPECT_STREQ(str.c_str(), utf8);
EXPECT_EQ(imp::cvt_cstr(utf16, imp::countof(utf16) - 1, &str[0], str.size()), cvt_len);
EXPECT_EQ(str, utf8);
}
{
auto cvt_len = imp::cvt_cstr(utf8, std::strlen(utf8), (char *)nullptr, 0);
EXPECT_EQ(cvt_len, std::strlen(utf8));
std::string str(cvt_len, '\0');
EXPECT_EQ(imp::cvt_cstr(utf8, cvt_len, &str[0], str.size()), cvt_len);
EXPECT_STREQ(str.c_str(), utf8);
EXPECT_EQ(str, utf8);
}
{
auto cvt_len = imp::cvt_cstr(utf16, std::wcslen(utf16), (wchar_t *)nullptr, 0);
EXPECT_EQ(cvt_len, std::wcslen(utf16));
std::wstring wstr(cvt_len, L'\0');
auto cvt_len = imp::cvt_cstr(utf16, imp::countof(utf16) - 1, (char16_t *)nullptr, 0);
EXPECT_EQ(cvt_len, imp::countof(utf16) - 1);
std::u16string wstr(cvt_len, u'\0');
EXPECT_EQ(imp::cvt_cstr(utf16, cvt_len, &wstr[0], wstr.size()), cvt_len);
EXPECT_STREQ(wstr.c_str(), utf16);
EXPECT_EQ(wstr, utf16);
}
}