diff --git a/include/chaiscript/language/chaiscript_engine.hpp b/include/chaiscript/language/chaiscript_engine.hpp index fff85873..889955c2 100644 --- a/include/chaiscript/language/chaiscript_engine.hpp +++ b/include/chaiscript/language/chaiscript_engine.hpp @@ -204,6 +204,27 @@ namespace chaiscript m_engine.add(fun([this](const std::string& t_namespace_name) { import(t_namespace_name); }), "import"); } + /// Skip BOM at the beginning of file + static bool skip_bom(std::ifstream &infile) { + size_t bytes_needed = 3; + char buffer[3]; + + memset(buffer, '\0', bytes_needed); + + infile.read(buffer, static_cast(bytes_needed)); + + if ((buffer[0] == '\xef') + && (buffer[1] == '\xbb') + && (buffer[2] == '\xbf')) { + + infile.seekg(3); + return true; + } + + infile.seekg(0); + + return false; + } /// Helper function for loading a file static std::string load_file(const std::string &t_filename) { @@ -213,11 +234,16 @@ namespace chaiscript throw chaiscript::exception::file_not_found_error(t_filename); } - const auto size = infile.tellg(); + auto size = infile.tellg(); infile.seekg(0, std::ios::beg); assert(size >= 0); + if (skip_bom(infile)) { + size-=3; // decrement the BOM size from file size, otherwise we'll get parsing errors + assert(size >=0 ); //and check if there's more text + } + if (size == std::streampos(0)) { return std::string(); diff --git a/include/chaiscript/language/chaiscript_parser.hpp b/include/chaiscript/language/chaiscript_parser.hpp index 97f59098..919fea86 100644 --- a/include/chaiscript/language/chaiscript_parser.hpp +++ b/include/chaiscript/language/chaiscript_parser.hpp @@ -542,10 +542,14 @@ namespace chaiscript /// Skips ChaiScript whitespace, which means space and tab, but not cr/lf /// jespada: Modified SkipWS to skip optionally CR ('\n') and/or LF+CR ("\r\n") + /// AlekMosingiewicz: Added exception when illegal character detected bool SkipWS(bool skip_cr=false) { bool retval = false; while (m_position.has_more()) { + if(static_cast(*m_position) > 0x7e) { + throw exception::eval_error("Illegal character", File_Position(m_position.line, m_position.col), *m_filename); + } auto end_line = (*m_position != 0) && ((*m_position == '\n') || (*m_position == '\r' && *(m_position+1) == '\n')); if ( char_in_alphabet(*m_position,detail::white_alphabet) || (skip_cr && end_line)) { diff --git a/unittests/compiled_tests.cpp b/unittests/compiled_tests.cpp index 3df1dbe2..8723d23d 100644 --- a/unittests/compiled_tests.cpp +++ b/unittests/compiled_tests.cpp @@ -352,7 +352,29 @@ TEST_CASE("Functor cast") CHECK(d == 3 * 6); } +TEST_CASE("Non-ASCII characters in the middle of string") +{ + chaiscript::ChaiScript_Basic chai(create_chaiscript_stdlib(),create_chaiscript_parser()); + CHECK_THROWS_AS(chai.eval("prin\xeft \"Hello World\""), chaiscript::exception::eval_error); +} +TEST_CASE("Non-ASCII characters in the beginning of string") +{ + chaiscript::ChaiScript_Basic chai(create_chaiscript_stdlib(),create_chaiscript_parser()); + CHECK_THROWS_AS(chai.eval("\xefprint \"Hello World\""), chaiscript::exception::eval_error); +} + +TEST_CASE("Non-ASCII characters in the end of string") +{ + chaiscript::ChaiScript_Basic chai(create_chaiscript_stdlib(),create_chaiscript_parser()); + CHECK_THROWS_AS(chai.eval("print \"Hello World\"\xef"), chaiscript::exception::eval_error); +} + +TEST_CASE("BOM in string") +{ + chaiscript::ChaiScript_Basic chai(create_chaiscript_stdlib(),create_chaiscript_parser()); + CHECK_THROWS_AS(chai.eval("\xef\xbb\xbfprint \"Hello World\""), chaiscript::exception::eval_error); +} int set_state_test_myfun() { diff --git a/unittests/eval_file_with_bom.chai b/unittests/eval_file_with_bom.chai new file mode 100644 index 00000000..b17ddba9 --- /dev/null +++ b/unittests/eval_file_with_bom.chai @@ -0,0 +1,2 @@ +eval_file("file_with_bom.inc") +assert_true(alwaysTrue()) \ No newline at end of file diff --git a/unittests/file_with_bom.inc b/unittests/file_with_bom.inc new file mode 100644 index 00000000..e7ad9558 --- /dev/null +++ b/unittests/file_with_bom.inc @@ -0,0 +1,3 @@ +def alwaysTrue() { + return true +} \ No newline at end of file