mirror of
https://github.com/ChaiScript/ChaiScript.git
synced 2026-01-01 03:12:23 +08:00
Fix handling of 32 bit unicode character escapes
This commit is contained in:
parent
81ebe1a7be
commit
1b9027a24f
@ -1088,7 +1088,17 @@ namespace chaiscript
|
||||
{
|
||||
const auto ch = static_cast<uint32_t>(std::stoi(hex_matches, nullptr, 16));
|
||||
hex_matches.clear();
|
||||
is_escaped = false;
|
||||
const auto u_size = unicode_size;
|
||||
unicode_size = 0;
|
||||
|
||||
char buf[4];
|
||||
if (u_size == 4 && ch >= 0xD800 && ch <= 0xDFFF) {
|
||||
throw exception::eval_error("Invalid 16 bit universal character");
|
||||
}
|
||||
|
||||
unicode_size = 0;
|
||||
|
||||
if (ch < 0x80) {
|
||||
match += static_cast<char>(ch);
|
||||
} else if (ch < 0x800) {
|
||||
@ -1108,10 +1118,8 @@ namespace chaiscript
|
||||
match.append(buf, 4);
|
||||
} else {
|
||||
// this must be an invalid escape sequence?
|
||||
throw exception::eval_error("Unknown 32 bit unicode literal sequence");
|
||||
throw exception::eval_error("Invalid 32 bit universal character");
|
||||
}
|
||||
is_escaped = false;
|
||||
unicode_size = 0;
|
||||
}
|
||||
|
||||
void parse(const char_type t_char, const int line, const int col, const std::string &filename) {
|
||||
@ -1157,7 +1165,6 @@ namespace chaiscript
|
||||
process_unicode();
|
||||
}
|
||||
return;
|
||||
|
||||
} else {
|
||||
// Not a unicode anymore, try parsing any way
|
||||
// May be someone used 'slash'uAA only
|
||||
@ -1182,7 +1189,7 @@ namespace chaiscript
|
||||
} else if (t_char == 'u') {
|
||||
unicode_size = 4;
|
||||
} else if (t_char == 'U') {
|
||||
unicode_size = 6;
|
||||
unicode_size = 8;
|
||||
} else {
|
||||
switch (t_char) {
|
||||
case ('\'') : match.push_back('\''); break;
|
||||
|
||||
@ -4,4 +4,13 @@ assert_equal("U for \u00dcmlauts", "U for Ümlauts")
|
||||
assert_equal("Thorn \u00fe sign", "Thorn þ sign")
|
||||
assert_equal("Test\u20Me", "Test Me")
|
||||
assert_equal("Test\u2022Me", "Test•Me")
|
||||
assert_equal("Test\U1F534Me", "Test🔴Me")
|
||||
//assert_equal("Test\uDD34\uD83DMe", "Test🔴Me")
|
||||
|
||||
assert_equal("\xF0\x9F\x8D\x8C", "🍌")
|
||||
assert_equal("\U0001F34C", "🍌")
|
||||
|
||||
assert_throws("Invalid 16 bit universal character", fun(){ parse("\"\\uD83C\""); });
|
||||
|
||||
assert_equal("\U24B62", "𤭢")
|
||||
|
||||
assert_equal("Test\U0001F534Me", "Test🔴Me")
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user