Fix handling of 32 bit unicode character escapes

This commit is contained in:
Jason Turner 2018-03-01 17:03:50 -07:00
parent 81ebe1a7be
commit 1b9027a24f
2 changed files with 22 additions and 6 deletions

View File

@ -1088,7 +1088,17 @@ namespace chaiscript
{
const auto ch = static_cast<uint32_t>(std::stoi(hex_matches, nullptr, 16));
hex_matches.clear();
is_escaped = false;
const auto u_size = unicode_size;
unicode_size = 0;
char buf[4];
if (u_size == 4 && ch >= 0xD800 && ch <= 0xDFFF) {
throw exception::eval_error("Invalid 16 bit universal character");
}
unicode_size = 0;
if (ch < 0x80) {
match += static_cast<char>(ch);
} else if (ch < 0x800) {
@ -1108,10 +1118,8 @@ namespace chaiscript
match.append(buf, 4);
} else {
// this must be an invalid escape sequence?
throw exception::eval_error("Unknown 32 bit unicode literal sequence");
throw exception::eval_error("Invalid 32 bit universal character");
}
is_escaped = false;
unicode_size = 0;
}
void parse(const char_type t_char, const int line, const int col, const std::string &filename) {
@ -1157,7 +1165,6 @@ namespace chaiscript
process_unicode();
}
return;
} else {
// Not a unicode anymore, try parsing any way
// May be someone used 'slash'uAA only
@ -1182,7 +1189,7 @@ namespace chaiscript
} else if (t_char == 'u') {
unicode_size = 4;
} else if (t_char == 'U') {
unicode_size = 6;
unicode_size = 8;
} else {
switch (t_char) {
case ('\'') : match.push_back('\''); break;

View File

@ -4,4 +4,13 @@ assert_equal("U for \u00dcmlauts", "U for Ümlauts")
assert_equal("Thorn \u00fe sign", "Thorn þ sign")
assert_equal("Test\u20Me", "Test Me")
assert_equal("Test\u2022Me", "Test•Me")
assert_equal("Test\U1F534Me", "Test🔴Me")
//assert_equal("Test\uDD34\uD83DMe", "Test🔴Me")
assert_equal("\xF0\x9F\x8D\x8C", "🍌")
assert_equal("\U0001F34C", "🍌")
assert_throws("Invalid 16 bit universal character", fun(){ parse("\"\\uD83C\""); });
assert_equal("\U24B62", "𤭢")
assert_equal("Test\U0001F534Me", "Test🔴Me")