From 1a9165f7fce2fe325a0381ef056eb00f8d3014f1 Mon Sep 17 00:00:00 2001 From: Jason Turner Date: Fri, 2 Mar 2018 07:45:24 -0700 Subject: [PATCH] Normalize on C++'s standards for \u and \U --- include/chaiscript/language/chaiscript_parser.hpp | 14 +++++++++++++- unittests/string_unicode_unicode.chai | 6 +++--- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/include/chaiscript/language/chaiscript_parser.hpp b/include/chaiscript/language/chaiscript_parser.hpp index 2c957ca8..1cac8d95 100644 --- a/include/chaiscript/language/chaiscript_parser.hpp +++ b/include/chaiscript/language/chaiscript_parser.hpp @@ -1064,6 +1064,13 @@ namespace chaiscript } } + void finalize_unicode() + { + if (unicode_size > 0) { + process_unicode(); + } + } + void process_hex() { auto val = stoll(hex_matches, nullptr, 16); @@ -1087,17 +1094,20 @@ namespace chaiscript void process_unicode() { const auto ch = static_cast(std::stoi(hex_matches, nullptr, 16)); + const auto match_size = hex_matches.size(); hex_matches.clear(); is_escaped = false; const auto u_size = unicode_size; unicode_size = 0; char buf[4]; + if (u_size != match_size) { + throw exception::eval_error("Incomplete unicode escape sequence"); + } if (u_size == 4 && ch >= 0xD800 && ch <= 0xDFFF) { throw exception::eval_error("Invalid 16 bit universal character"); } - unicode_size = 0; if (ch < 0x80) { match += static_cast(ch); @@ -1289,6 +1299,7 @@ namespace chaiscript } } + cparser.finalize_unicode(); return cparser.is_interpolated; }(); @@ -1347,6 +1358,7 @@ namespace chaiscript for (auto s = start + 1, end = m_position - 1; s != end; ++s) { cparser.parse(*s, start.line, start.col, *m_filename); } + cparser.finalize_unicode(); } if (match.size() != 1) { diff --git a/unittests/string_unicode_unicode.chai b/unittests/string_unicode_unicode.chai index 267fbb64..d79dcaf3 100644 --- a/unittests/string_unicode_unicode.chai +++ b/unittests/string_unicode_unicode.chai @@ -2,15 +2,15 @@ assert_equal("\uc39c", "쎜") assert_equal("U for \u00dcmlauts", "U for Ümlauts") assert_equal("Thorn \u00fe sign", "Thorn þ sign") -assert_equal("Test\u20Me", "Test Me") +assert_equal("Test\u0020Me", "Test Me") assert_equal("Test\u2022Me", "Test•Me") -//assert_equal("Test\uDD34\uD83DMe", "Test🔴Me") assert_equal("\xF0\x9F\x8D\x8C", "🍌") assert_equal("\U0001F34C", "🍌") assert_throws("Invalid 16 bit universal character", fun(){ parse("\"\\uD83C\""); }); +assert_throws("Incomplete unicode escape sequence", fun(){ parse("\"\\uD83\""); }); -assert_equal("\U24B62", "𤭢") +assert_equal("\U00024B62", "𤭢") assert_equal("Test\U0001F534Me", "Test🔴Me")