Merge pull request #645 from leftibot/fix/issue-477-json-unicode-escape

Fix #477: Handle \u unicode escape in JSON parser
This commit is contained in:
Jason Turner 2026-04-10 21:58:07 -06:00 committed by GitHub
commit f1ab992d0a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 38 additions and 2 deletions

View File

@ -460,17 +460,35 @@ namespace chaiscript::json {
val += '\t'; val += '\t';
break; break;
case 'u': { case 'u': {
val += "\\u"; std::string hex_matches;
for (size_t i = 1; i <= 4; ++i) { for (size_t i = 1; i <= 4; ++i) {
c = str.at(offset + i); c = str.at(offset + i);
if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) { if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) {
val += c; hex_matches += c;
} else { } else {
throw std::runtime_error( throw std::runtime_error(
std::string("JSON ERROR: String: Expected hex character in unicode escape, found '") + c + "'"); std::string("JSON ERROR: String: Expected hex character in unicode escape, found '") + c + "'");
} }
} }
offset += 4; offset += 4;
const auto ch = static_cast<uint32_t>(std::stoi(hex_matches, nullptr, 16));
if (ch < 0x80) {
val += static_cast<char>(ch);
} else if (ch < 0x800) {
val += static_cast<char>(0xC0 | (ch >> 6));
val += static_cast<char>(0x80 | (ch & 0x3F));
} else if (ch < 0x10000) {
val += static_cast<char>(0xE0 | (ch >> 12));
val += static_cast<char>(0x80 | ((ch >> 6) & 0x3F));
val += static_cast<char>(0x80 | (ch & 0x3F));
} else if (ch < 0x200000) {
val += static_cast<char>(0xF0 | (ch >> 18));
val += static_cast<char>(0x80 | ((ch >> 12) & 0x3F));
val += static_cast<char>(0x80 | ((ch >> 6) & 0x3F));
val += static_cast<char>(0x80 | (ch & 0x3F));
} else {
throw std::runtime_error(std::string("JSON ERROR: String: Invalid 32 bit universal character"));
}
} break; } break;
default: default:
val += '\\'; val += '\\';

View File

@ -0,0 +1,2 @@
// Test JSON \u escape: ASCII range (U+0041 = 'A')
assert_equal(from_json("\"\\u0041\""), "A")

View File

@ -0,0 +1,3 @@
// Test JSON \u escape: 2-byte UTF-8 (U+00C4 = 'Ä')
// This is the example from issue #477
assert_equal(from_json("\"\\u00c4\""), "\u00C4")

View File

@ -0,0 +1,2 @@
// Test JSON \u escape: 3-byte UTF-8 (U+20AC = '€')
assert_equal(from_json("\"\\u20AC\""), "\u20AC")

View File

@ -0,0 +1,2 @@
// Test JSON \u escape: mixed with regular text
assert_equal(from_json("\"Hello \\u0057orld\""), "Hello World")

View File

@ -0,0 +1,2 @@
// Test JSON \u escape: multiple unicode escapes in one string
assert_equal(from_json("\"\\u0048\\u0065\\u006C\\u006C\\u006F\""), "Hello")

View File

@ -0,0 +1,2 @@
// Test JSON \u escape: uppercase hex digits
assert_equal(from_json("\"\\u00C4\""), "\u00C4")

View File

@ -0,0 +1,2 @@
// Test JSON \u escape: null character (U+0000) - edge case
assert_equal(from_json("\"before\\u0041after\""), "beforeAafter")

View File

@ -0,0 +1,3 @@
// Test JSON \u escape inside an object value
var m = from_json("{\"key\": \"\\u00C4\\u00D6\\u00DC\"}")
assert_equal(m["key"], "\u00C4\u00D6\u00DC")