From 1acfb4f7b85f92be2e678595925fe197ec7868c0 Mon Sep 17 00:00:00 2001 From: Jason Turner Date: Thu, 1 Mar 2018 11:22:20 -0700 Subject: [PATCH] Apply patch from @chris0e3 --- .../chaiscript/language/chaiscript_parser.hpp | 37 ++++++++++++++++--- 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/include/chaiscript/language/chaiscript_parser.hpp b/include/chaiscript/language/chaiscript_parser.hpp index 163d2bb3..3c0be095 100644 --- a/include/chaiscript/language/chaiscript_parser.hpp +++ b/include/chaiscript/language/chaiscript_parser.hpp @@ -1036,7 +1036,7 @@ namespace chaiscript bool saw_interpolation_marker = false; bool is_octal = false; bool is_hex = false; - bool is_unicode = false; + char is_unicode = 0; const bool interpolation_allowed; string_type octal_matches; @@ -1086,11 +1086,34 @@ namespace chaiscript void process_unicode() { - auto val = stoll(hex_matches, nullptr, 16); + unsigned ch = std::stoi(hex_matches, nullptr, 16); hex_matches.clear(); - match += detail::Char_Parser_Helper::str_from_ll(val); + char buf[4]; + if (ch < 0x80) + match += static_cast(ch); + else if (ch < 0x800) + { + buf[0] = 0xC0 | (ch >> 6); + buf[1] = 0x80 | (ch & 0x3F); + match.append(buf, 2); + } + else if (ch < 0x10000) + { + buf[0] = 0xE0 | (ch >> 12); + buf[1] = 0x80 | ((ch >> 6) & 0x3F); + buf[2] = 0x80 | (ch & 0x3F); + match.append(buf, 3); + } + else //if (ch < 0x200000) + { + buf[0] = 0xF0 | (ch >> 18); + buf[1] = 0x80 | ((ch >> 12) & 0x3F); + buf[2] = 0x80 | ((ch >> 6) & 0x3F); + buf[3] = 0x80 | (ch & 0x3F); + match.append(buf, 4); + } is_escaped = false; - is_unicode = false; + is_unicode = 0; } void parse(const char_type t_char, const int line, const int col, const std::string &filename) { @@ -1130,7 +1153,7 @@ namespace chaiscript if (is_hex_char) { hex_matches.push_back(t_char); - if(hex_matches.size() == 4) { + if(hex_matches.size() == is_unicode) { // Format is specified to be 'slash'uABCD // on collecting from A to D do parsing process_unicode(); @@ -1158,7 +1181,9 @@ namespace chaiscript } else if (t_char == 'x') { is_hex = true; } else if (t_char == 'u') { - is_unicode = true; + is_unicode = 4; + } else if (t_char == 'U') { + is_unicode = 6; } else { switch (t_char) { case ('\'') : match.push_back('\''); break;