From f1e0fb35da3e4e916738d38e526d0be3702cb783 Mon Sep 17 00:00:00 2001 From: leftibot Date: Mon, 13 Apr 2026 19:30:57 -0600 Subject: [PATCH] Fix #206: Make internal string type a template parameter Add a StringType template parameter (defaulting to std::string) to ChaiScript_Parser, Bootstrap::bootstrap(), and Std_Lib::library(), flowing through to the ChaiScript_Impl convenience class. This allows users to instantiate ChaiScript with std::wstring (via ChaiScript_WString) or other string types. String literals, escape sequences (including unicode for wide chars via if constexpr), to_string conversions, and string operations all respect the parameterized type. Co-Authored-By: Claude Opus 4.6 (1M context) --- CMakeLists.txt | 4 + include/chaiscript/chaiscript.hpp | 12 +- include/chaiscript/chaiscript_stdlib.hpp | 5 +- include/chaiscript/dispatchkit/bootstrap.hpp | 25 +++- .../chaiscript/language/chaiscript_parser.hpp | 126 +++++++++++++----- unittests/string_type_param_test.cpp | 47 +++++++ 6 files changed, 172 insertions(+), 47 deletions(-) create mode 100644 unittests/string_type_param_test.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 30924841..9f133b56 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -444,6 +444,10 @@ if(BUILD_TESTING) target_link_libraries(threading_config_test ${LIBS}) add_test(NAME Threading_Config_Test COMMAND threading_config_test) + add_executable(string_type_param_test unittests/string_type_param_test.cpp) + target_link_libraries(string_type_param_test ${LIBS}) + add_test(NAME String_Type_Param_Test COMMAND string_type_param_test) + install(TARGETS test_module RUNTIME DESTINATION bin LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}/chaiscript") endif() endif() diff --git a/include/chaiscript/chaiscript.hpp b/include/chaiscript/chaiscript.hpp index de8c3288..55b28e13 100644 --- a/include/chaiscript/chaiscript.hpp +++ b/include/chaiscript/chaiscript.hpp @@ -820,20 +820,24 @@ #include "language/chaiscript_parser.hpp" namespace chaiscript { - class ChaiScript : public ChaiScript_Basic { + template + class ChaiScript_Impl : public ChaiScript_Basic { public: - ChaiScript(std::vector t_modulepaths = {}, + ChaiScript_Impl(std::vector t_modulepaths = {}, std::vector t_usepaths = {}, std::vector t_opts = chaiscript::default_options(), std::vector t_lib_opts = {}) - : ChaiScript_Basic(chaiscript::Std_Lib::library(t_lib_opts), - std::make_unique>(), + : ChaiScript_Basic(chaiscript::Std_Lib::library(t_lib_opts), + std::make_unique>(), std::move(t_modulepaths), std::move(t_usepaths), std::move(t_opts), std::find(t_lib_opts.begin(), t_lib_opts.end(), Library_Options::No_IO) != t_lib_opts.end()) { } }; + + using ChaiScript = ChaiScript_Impl; + using ChaiScript_WString = ChaiScript_Impl; } // namespace chaiscript #endif /* CHAISCRIPT_HPP_ */ diff --git a/include/chaiscript/chaiscript_stdlib.hpp b/include/chaiscript/chaiscript_stdlib.hpp index 73b11542..2787f19a 100644 --- a/include/chaiscript/chaiscript_stdlib.hpp +++ b/include/chaiscript/chaiscript_stdlib.hpp @@ -38,6 +38,7 @@ namespace chaiscript { class Std_Lib { public: + template [[nodiscard]] static ModulePtr library(const std::vector &t_opts = {}) { if (std::find(t_opts.begin(), t_opts.end(), Library_Options::No_Stdlib) != t_opts.end()) { return std::make_shared(); @@ -49,10 +50,10 @@ namespace chaiscript { const bool no_prelude = std::find(t_opts.begin(), t_opts.end(), Library_Options::No_Prelude) != t_opts.end(); const bool no_json = std::find(t_opts.begin(), t_opts.end(), Library_Options::No_JSON) != t_opts.end(); - bootstrap::Bootstrap::bootstrap(*lib, no_io); + bootstrap::Bootstrap::bootstrap(*lib, no_io); bootstrap::standard_library::vector_type>("Vector", *lib); - bootstrap::standard_library::string_type("string", *lib); + bootstrap::standard_library::string_type("string", *lib); bootstrap::standard_library::map_type>("Map", *lib); bootstrap::standard_library::pair_type>("Pair", *lib); diff --git a/include/chaiscript/dispatchkit/bootstrap.hpp b/include/chaiscript/dispatchkit/bootstrap.hpp index 041af80e..167321f2 100644 --- a/include/chaiscript/dispatchkit/bootstrap.hpp +++ b/include/chaiscript/dispatchkit/bootstrap.hpp @@ -10,6 +10,8 @@ #ifndef CHAISCRIPT_BOOTSTRAP_HPP_ #define CHAISCRIPT_BOOTSTRAP_HPP_ +#include + #include "../utility/utility.hpp" #include "register_function.hpp" @@ -269,6 +271,7 @@ namespace chaiscript::bootstrap { /// \brief perform all common bootstrap functions for std::string, void and POD types /// \param[in,out] m Module to add bootstrapped functions to /// \param[in] t_no_io If true, skip registering print_string and println_string + template static void bootstrap(Module &m, const bool t_no_io = false) { m.add(user_type(), "void"); m.add(user_type(), "bool"); @@ -393,13 +396,27 @@ namespace chaiscript::bootstrap { operators::equal(m); operators::not_equal(m); - m.add(fun([](const std::string &s) { return s; }), "to_string"); - m.add(fun([](const bool b) { return std::string(b ? "true" : "false"); }), "to_string"); + m.add(fun([](const StringType &s) { return s; }), "to_string"); + m.add(fun([](const bool b) -> StringType { + if constexpr (std::is_same_v) { + return b ? "true" : "false"; + } else { + const auto s = std::string(b ? "true" : "false"); + return StringType(s.begin(), s.end()); + } + }), "to_string"); m.add(fun(&unknown_assign), "="); m.add(fun([](const Boxed_Value &bv) { throw bv; }), "throw"); - m.add(fun([](const char c) { return std::string(1, c); }), "to_string"); - m.add(fun(&Boxed_Number::to_string), "to_string"); + m.add(fun([](const typename StringType::value_type c) -> StringType { return StringType(1, c); }), "to_string"); + if constexpr (std::is_same_v) { + m.add(fun(&Boxed_Number::to_string), "to_string"); + } else { + m.add(fun([](const Boxed_Number &n) -> StringType { + const auto s = n.to_string(); + return StringType(s.begin(), s.end()); + }), "to_string"); + } bootstrap_pod_type("double", m); bootstrap_pod_type("long_double", m); diff --git a/include/chaiscript/language/chaiscript_parser.hpp b/include/chaiscript/language/chaiscript_parser.hpp index be2a7525..5b2bad3e 100644 --- a/include/chaiscript/language/chaiscript_parser.hpp +++ b/include/chaiscript/language/chaiscript_parser.hpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include "../dispatchkit/boxed_value.hpp" @@ -101,9 +102,31 @@ namespace chaiscript { return Char_Parser_Helper::u8str_from_ll(val); } }; + template + int stoi_for_string(const S &s, std::size_t *pos, int base) { + if constexpr (std::is_same_v) { + return std::stoi(s, pos, base); + } else if constexpr (std::is_same_v) { + return std::stoi(std::wstring(s.begin(), s.end()), pos, base); + } else { + return std::stoi(std::string(s.begin(), s.end()), pos, base); + } + } + + template + long long stoll_for_string(const S &s, std::size_t *pos, int base) { + if constexpr (std::is_same_v) { + return std::stoll(s, pos, base); + } else if constexpr (std::is_same_v) { + return std::stoll(std::wstring(s.begin(), s.end()), pos, base); + } else { + return std::stoll(std::string(s.begin(), s.end()), pos, base); + } + } + } // namespace detail - template + template class ChaiScript_Parser final : public ChaiScript_Parser_Base { void *get_tracer_ptr() noexcept override { return &m_tracer; } @@ -812,6 +835,17 @@ namespace chaiscript { #endif } + template + static std::string to_narrow(const S &s) { + if constexpr (std::is_same_v) { + return s; + } else if constexpr (std::is_convertible_v) { + return std::string(s); + } else { + return std::string(s.begin(), s.end()); + } + } + template std::unique_ptr> make_node(std::string_view t_match, const int t_prev_line, const int t_prev_col, Param &&...param) { @@ -1100,8 +1134,8 @@ namespace chaiscript { void process_hex() { if (!hex_matches.empty()) { - auto val = stoll(hex_matches, nullptr, 16); - match.push_back(char_type(val)); + const auto val = detail::stoll_for_string(hex_matches, nullptr, 16); + match.push_back(static_cast(val)); } hex_matches.clear(); is_escaped = false; @@ -1110,8 +1144,8 @@ namespace chaiscript { void process_octal() { if (!octal_matches.empty()) { - auto val = stoll(octal_matches, nullptr, 8); - match.push_back(char_type(val)); + const auto val = detail::stoll_for_string(octal_matches, nullptr, 8); + match.push_back(static_cast(val)); } octal_matches.clear(); is_escaped = false; @@ -1119,14 +1153,13 @@ namespace chaiscript { } void process_unicode() { - const auto ch = static_cast(std::stoi(hex_matches, nullptr, 16)); + const auto ch = static_cast(detail::stoi_for_string(hex_matches, nullptr, 16)); const auto match_size = hex_matches.size(); hex_matches.clear(); is_escaped = false; const auto u_size = unicode_size; unicode_size = 0; - char buf[4]; if (u_size != match_size) { throw exception::eval_error("Incomplete unicode escape sequence"); } @@ -1134,26 +1167,44 @@ namespace chaiscript { throw exception::eval_error("Invalid 16 bit universal character"); } - if (ch < 0x80) { - match += static_cast(ch); - } else if (ch < 0x800) { - buf[0] = static_cast(0xC0 | (ch >> 6)); - buf[1] = static_cast(0x80 | (ch & 0x3F)); - match.append(buf, 2); - } else if (ch < 0x10000) { - buf[0] = static_cast(0xE0 | (ch >> 12)); - buf[1] = static_cast(0x80 | ((ch >> 6) & 0x3F)); - buf[2] = static_cast(0x80 | (ch & 0x3F)); - match.append(buf, 3); - } else if (ch < 0x200000) { - buf[0] = static_cast(0xF0 | (ch >> 18)); - buf[1] = static_cast(0x80 | ((ch >> 12) & 0x3F)); - buf[2] = static_cast(0x80 | ((ch >> 6) & 0x3F)); - buf[3] = static_cast(0x80 | (ch & 0x3F)); - match.append(buf, 4); + if constexpr (sizeof(char_type) >= 4) { + if (ch < 0x200000) { + match.push_back(static_cast(ch)); + } else { + throw exception::eval_error("Invalid 32 bit universal character"); + } + } else if constexpr (sizeof(char_type) >= 2) { + if (ch < 0x10000) { + match.push_back(static_cast(ch)); + } else if (ch < 0x110000) { + const auto adjusted = ch - 0x10000; + match.push_back(static_cast(0xD800 + (adjusted >> 10))); + match.push_back(static_cast(0xDC00 + (adjusted & 0x3FF))); + } else { + throw exception::eval_error("Invalid 32 bit universal character"); + } } else { - // this must be an invalid escape sequence? - throw exception::eval_error("Invalid 32 bit universal character"); + char buf[4]; + if (ch < 0x80) { + match += static_cast(ch); + } else if (ch < 0x800) { + buf[0] = static_cast(0xC0 | (ch >> 6)); + buf[1] = static_cast(0x80 | (ch & 0x3F)); + match.append(buf, 2); + } else if (ch < 0x10000) { + buf[0] = static_cast(0xE0 | (ch >> 12)); + buf[1] = static_cast(0x80 | ((ch >> 6) & 0x3F)); + buf[2] = static_cast(0x80 | (ch & 0x3F)); + match.append(buf, 3); + } else if (ch < 0x200000) { + buf[0] = static_cast(0xF0 | (ch >> 18)); + buf[1] = static_cast(0x80 | ((ch >> 12) & 0x3F)); + buf[2] = static_cast(0x80 | ((ch >> 6) & 0x3F)); + buf[3] = static_cast(0x80 | (ch & 0x3F)); + match.append(buf, 4); + } else { + throw exception::eval_error("Invalid 32 bit universal character"); + } } } @@ -1280,11 +1331,11 @@ namespace chaiscript { const auto start = m_position; if (Quoted_String_()) { - std::string match; + StringType match; const auto prev_stack_top = m_match_stack.size(); bool is_interpolated = [&]() -> bool { - Char_Parser cparser(match, true); + Char_Parser cparser(match, true); auto s = start + 1, end = m_position - 1; @@ -1293,7 +1344,7 @@ namespace chaiscript { if (*s == '{') { // We've found an interpolation point - m_match_stack.push_back(make_node>(match, start.line, start.col, const_var(match))); + m_match_stack.push_back(make_node>(to_narrow(match), start.line, start.col, const_var(match))); if (cparser.is_interpolated) { // If we've seen previous interpolation, add on instead of making a new one @@ -1351,7 +1402,7 @@ namespace chaiscript { return cparser.is_interpolated; }(); - m_match_stack.push_back(make_node>(match, start.line, start.col, const_var(match))); + m_match_stack.push_back(make_node>(to_narrow(match), start.line, start.col, const_var(match))); if (is_interpolated) { build_match>(prev_stack_top, "+"); @@ -1439,7 +1490,7 @@ namespace chaiscript { close_seq += '"'; // Extract raw content up to closing sequence - std::string match; + StringType match; auto end = m_position; // m_position is already past the closing sequence // Content is from s up to (end - close_seq.size()) @@ -1456,11 +1507,11 @@ namespace chaiscript { break; } } - match.push_back(*s); + match.push_back(static_cast(*s)); ++s; } - m_match_stack.push_back(make_node>(match, start.line, start.col, const_var(match))); + m_match_stack.push_back(make_node>(to_narrow(match), start.line, start.col, const_var(match))); return true; } return false; @@ -1501,11 +1552,11 @@ namespace chaiscript { const auto start = m_position; if (Single_Quoted_String_()) { - std::string match; + StringType match; { // scope for cparser destructor - Char_Parser cparser(match, false); + Char_Parser cparser(match, false); for (auto s = start + 1, end = m_position - 1; s != end; ++s) { cparser.parse(*s, start.line, start.col, *m_filename); @@ -1518,7 +1569,8 @@ namespace chaiscript { *m_filename); } - m_match_stack.push_back(make_node>(match, start.line, start.col, const_var(char(match.at(0))))); + m_match_stack.push_back(make_node>(to_narrow(match), start.line, start.col, + const_var(static_cast(match.at(0))))); return true; } else { return false; @@ -2805,7 +2857,7 @@ namespace chaiscript { } AST_NodePtr parse(const std::string &t_input, const std::string &t_fname) override { - ChaiScript_Parser parser(m_tracer, m_optimizer); + ChaiScript_Parser parser(m_tracer, m_optimizer); return parser.parse_internal(t_input, t_fname); } diff --git a/unittests/string_type_param_test.cpp b/unittests/string_type_param_test.cpp new file mode 100644 index 00000000..416a6827 --- /dev/null +++ b/unittests/string_type_param_test.cpp @@ -0,0 +1,47 @@ +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4062 4242 4566 4640 4702 6330 28251) +#endif + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunknown-pragmas" +#pragma GCC diagnostic ignored "-Wparentheses" +#pragma GCC diagnostic ignored "-Wignored-qualifiers" +#endif + +#include +#include + +#define CATCH_CONFIG_MAIN +#include "catch.hpp" + +TEST_CASE("String type can be parameterized to wstring") { + chaiscript::ChaiScript_WString chai; + + SECTION("String literals produce std::wstring") { + auto result = chai.eval("\"hello\""); + CHECK(result == L"hello"); + } + + SECTION("String concatenation works with wstring") { + auto result = chai.eval("\"hello\" + \" world\""); + CHECK(result == L"hello world"); + } + + SECTION("to_string works for numbers with wstring") { + auto result = chai.eval("to_string(42)"); + CHECK(result == L"42"); + } + + SECTION("String interpolation works with wstring") { + auto result = chai.eval("var x = 5; \"value: ${x}\""); + CHECK(result == L"value: 5"); + } + + SECTION("Default ChaiScript still uses std::string") { + chaiscript::ChaiScript default_chai; + auto result = default_chai.eval("\"hello\""); + CHECK(result == "hello"); + } +}