Normalize on C++'s standards for \u and \U

This commit is contained in:
Jason Turner 2018-03-02 07:45:24 -07:00
parent 1b9027a24f
commit 1a9165f7fc
2 changed files with 16 additions and 4 deletions

View File

@ -1064,6 +1064,13 @@ namespace chaiscript
}
}
void finalize_unicode()
{
if (unicode_size > 0) {
process_unicode();
}
}
void process_hex()
{
auto val = stoll(hex_matches, nullptr, 16);
@ -1087,17 +1094,20 @@ namespace chaiscript
void process_unicode()
{
const auto ch = static_cast<uint32_t>(std::stoi(hex_matches, nullptr, 16));
const auto match_size = hex_matches.size();
hex_matches.clear();
is_escaped = false;
const auto u_size = unicode_size;
unicode_size = 0;
char buf[4];
if (u_size != match_size) {
throw exception::eval_error("Incomplete unicode escape sequence");
}
if (u_size == 4 && ch >= 0xD800 && ch <= 0xDFFF) {
throw exception::eval_error("Invalid 16 bit universal character");
}
unicode_size = 0;
if (ch < 0x80) {
match += static_cast<char>(ch);
@ -1289,6 +1299,7 @@ namespace chaiscript
}
}
cparser.finalize_unicode();
return cparser.is_interpolated;
}();
@ -1347,6 +1358,7 @@ namespace chaiscript
for (auto s = start + 1, end = m_position - 1; s != end; ++s) {
cparser.parse(*s, start.line, start.col, *m_filename);
}
cparser.finalize_unicode();
}
if (match.size() != 1) {

View File

@ -2,15 +2,15 @@ assert_equal("\uc39c", "쎜")
assert_equal("U for \u00dcmlauts", "U for Ümlauts")
assert_equal("Thorn \u00fe sign", "Thorn þ sign")
assert_equal("Test\u20Me", "Test Me")
assert_equal("Test\u0020Me", "Test Me")
assert_equal("Test\u2022Me", "Test•Me")
//assert_equal("Test\uDD34\uD83DMe", "Test🔴Me")
assert_equal("\xF0\x9F\x8D\x8C", "🍌")
assert_equal("\U0001F34C", "🍌")
assert_throws("Invalid 16 bit universal character", fun(){ parse("\"\\uD83C\""); });
assert_throws("Incomplete unicode escape sequence", fun(){ parse("\"\\uD83\""); });
assert_equal("\U24B62", "𤭢")
assert_equal("\U00024B62", "𤭢")
assert_equal("Test\U0001F534Me", "Test🔴Me")