From f37d0e13d3ca701ceab4ed4f900ef24c50efb1bb Mon Sep 17 00:00:00 2001 From: Alek Mosingiewicz Date: Thu, 10 May 2018 17:44:06 +0200 Subject: [PATCH 01/26] Skip UTF-8 BOM before parsing begins. --- include/chaiscript/language/chaiscript_parser.hpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/chaiscript/language/chaiscript_parser.hpp b/include/chaiscript/language/chaiscript_parser.hpp index c82aff38..417e876d 100644 --- a/include/chaiscript/language/chaiscript_parser.hpp +++ b/include/chaiscript/language/chaiscript_parser.hpp @@ -2569,6 +2569,13 @@ namespace chaiscript } } + //skip UTF-8 BOM + if ((t_input.size() > 3) && (t_input[0] == '\xef') && (t_input[1] == '\xbb' && t_input[2] == '\xbf')) { + while(m_position.has_more() && (m_position.col < 4)) { + ++m_position; + } + } + if (Statements(true)) { if (m_position.has_more()) { throw exception::eval_error("Unparsed input", File_Position(m_position.line, m_position.col), *m_filename); From 1d782338c9904b3173d330686d8751b37f7c08c0 Mon Sep 17 00:00:00 2001 From: Alek Mosingiewicz Date: Thu, 10 May 2018 18:23:39 +0200 Subject: [PATCH 02/26] Cover skipping BOM with test. --- unittests/compiled_tests.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/unittests/compiled_tests.cpp b/unittests/compiled_tests.cpp index a39c3f8d..dc4a1f24 100644 --- a/unittests/compiled_tests.cpp +++ b/unittests/compiled_tests.cpp @@ -352,6 +352,18 @@ TEST_CASE("Functor cast") CHECK(d == 3 * 6); } +TEST_CASE("BOM at beginning of string") +{ + chaiscript::ChaiScript_Basic chai(create_chaiscript_stdlib(),create_chaiscript_parser()); + + chai.add(chaiscript::fun(&functor_cast_test_call), "test_call"); + + chai.eval("def func() { return \"Hello World\"; };"); + + std::string result = chai.eval("\xef\xbb\xbf(func())"); + + CHECK(result.compare(std::string("Hello World")) == 0); +} int set_state_test_myfun() From 1e8f7f9fa5f2a896462f5487d2d202619f26ac06 Mon Sep 17 00:00:00 2001 From: Alek Mosingiewicz Date: Thu, 10 May 2018 18:40:56 +0200 Subject: [PATCH 03/26] Simplify BOM test. --- unittests/compiled_tests.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/unittests/compiled_tests.cpp b/unittests/compiled_tests.cpp index dc4a1f24..18cf9202 100644 --- a/unittests/compiled_tests.cpp +++ b/unittests/compiled_tests.cpp @@ -360,9 +360,7 @@ TEST_CASE("BOM at beginning of string") chai.eval("def func() { return \"Hello World\"; };"); - std::string result = chai.eval("\xef\xbb\xbf(func())"); - - CHECK(result.compare(std::string("Hello World")) == 0); + CHECK(chai.eval("\xef\xbb\xbf(func())") == "Hello World"); } From efbebee9da6c42c26c8d102c25a49532e05cfc46 Mon Sep 17 00:00:00 2001 From: Alek Mosingiewicz Date: Sun, 13 May 2018 10:25:04 +0200 Subject: [PATCH 04/26] Throw exception when user-provided input contains BOM. --- .../chaiscript/language/chaiscript_parser.hpp | 17 ++++++++++------- unittests/compiled_tests.cpp | 7 +------ unittests/eval_file_with_bom.chai | 2 ++ unittests/file_with_bom.inc | 3 +++ 4 files changed, 16 insertions(+), 13 deletions(-) create mode 100644 unittests/eval_file_with_bom.chai create mode 100644 unittests/file_with_bom.inc diff --git a/include/chaiscript/language/chaiscript_parser.hpp b/include/chaiscript/language/chaiscript_parser.hpp index 417e876d..777dfa7f 100644 --- a/include/chaiscript/language/chaiscript_parser.hpp +++ b/include/chaiscript/language/chaiscript_parser.hpp @@ -114,6 +114,12 @@ namespace chaiscript // little SFINAE trick to avoid base class return Char_Parser_Helper::u8str_from_ll(val); } + + static bool has_utf8_bom(const std::string &t_input) + { + //skip UTF-8 BOM + return ((t_input.size() > 3) && (t_input[0] == '\xef') && (t_input[1] == '\xbb' && t_input[2] == '\xbf')); + } }; } @@ -2562,6 +2568,10 @@ namespace chaiscript AST_NodePtr parse_internal(const std::string &t_input, std::string t_fname) { m_position = Position(t_input.begin(), t_input.end()); m_filename = std::make_shared(std::move(t_fname)); + + if (detail::Char_Parser_Helper::has_utf8_bom(t_input)) { + throw exception::eval_error("UTF-8 in user provided input!"); + } if ((t_input.size() > 1) && (t_input[0] == '#') && (t_input[1] == '!')) { while (m_position.has_more() && (!Eol())) { @@ -2569,13 +2579,6 @@ namespace chaiscript } } - //skip UTF-8 BOM - if ((t_input.size() > 3) && (t_input[0] == '\xef') && (t_input[1] == '\xbb' && t_input[2] == '\xbf')) { - while(m_position.has_more() && (m_position.col < 4)) { - ++m_position; - } - } - if (Statements(true)) { if (m_position.has_more()) { throw exception::eval_error("Unparsed input", File_Position(m_position.line, m_position.col), *m_filename); diff --git a/unittests/compiled_tests.cpp b/unittests/compiled_tests.cpp index 18cf9202..66d804f7 100644 --- a/unittests/compiled_tests.cpp +++ b/unittests/compiled_tests.cpp @@ -355,12 +355,7 @@ TEST_CASE("Functor cast") TEST_CASE("BOM at beginning of string") { chaiscript::ChaiScript_Basic chai(create_chaiscript_stdlib(),create_chaiscript_parser()); - - chai.add(chaiscript::fun(&functor_cast_test_call), "test_call"); - - chai.eval("def func() { return \"Hello World\"; };"); - - CHECK(chai.eval("\xef\xbb\xbf(func())") == "Hello World"); + CHECK_THROWS_AS(chai.eval("\xef\xbb\xbfprint \"Hello World\""), chaiscript::exception::eval_error); } diff --git a/unittests/eval_file_with_bom.chai b/unittests/eval_file_with_bom.chai new file mode 100644 index 00000000..b17ddba9 --- /dev/null +++ b/unittests/eval_file_with_bom.chai @@ -0,0 +1,2 @@ +eval_file("file_with_bom.inc") +assert_true(alwaysTrue()) \ No newline at end of file diff --git a/unittests/file_with_bom.inc b/unittests/file_with_bom.inc new file mode 100644 index 00000000..e7ad9558 --- /dev/null +++ b/unittests/file_with_bom.inc @@ -0,0 +1,3 @@ +def alwaysTrue() { + return true +} \ No newline at end of file From a024db040d4b1d51dcd8c3dc166f2d325683c758 Mon Sep 17 00:00:00 2001 From: Alek Mosingiewicz Date: Sun, 13 May 2018 12:24:34 +0200 Subject: [PATCH 05/26] Catch BOM at the beginning of file. --- .../chaiscript/language/chaiscript_engine.hpp | 20 +++++++++++++++++++ .../chaiscript/language/chaiscript_parser.hpp | 6 +++--- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/include/chaiscript/language/chaiscript_engine.hpp b/include/chaiscript/language/chaiscript_engine.hpp index fff85873..fa200ce4 100644 --- a/include/chaiscript/language/chaiscript_engine.hpp +++ b/include/chaiscript/language/chaiscript_engine.hpp @@ -52,6 +52,7 @@ #include "../dispatchkit/exception_specification.hpp" +#include "chaiscript_parser.hpp" namespace chaiscript { @@ -204,6 +205,23 @@ namespace chaiscript m_engine.add(fun([this](const std::string& t_namespace_name) { import(t_namespace_name); }), "import"); } + /// Skip BOM at the beginning of file + static bool skip_bom(std::ifstream &infile) { + char buffer[4]; + + memset(buffer, '\0', 4); + infile.readsome(buffer, 3); + std::string buffer_string(buffer); + + if (chaiscript::parser::detail::Char_Parser_Helper::has_utf8_bom(buffer_string)) { + infile.seekg(3); + return true; + } + + infile.seekg(0); + + return false; + } /// Helper function for loading a file static std::string load_file(const std::string &t_filename) { @@ -218,6 +236,8 @@ namespace chaiscript assert(size >= 0); + skip_bom(infile); + if (size == std::streampos(0)) { return std::string(); diff --git a/include/chaiscript/language/chaiscript_parser.hpp b/include/chaiscript/language/chaiscript_parser.hpp index 777dfa7f..44edc0b6 100644 --- a/include/chaiscript/language/chaiscript_parser.hpp +++ b/include/chaiscript/language/chaiscript_parser.hpp @@ -114,11 +114,11 @@ namespace chaiscript // little SFINAE trick to avoid base class return Char_Parser_Helper::u8str_from_ll(val); } - + static bool has_utf8_bom(const std::string &t_input) { //skip UTF-8 BOM - return ((t_input.size() > 3) && (t_input[0] == '\xef') && (t_input[1] == '\xbb' && t_input[2] == '\xbf')); + return ((t_input.size() > 2) && (t_input[0] == '\xef') && (t_input[1] == '\xbb' && t_input[2] == '\xbf')); } }; } @@ -2568,7 +2568,7 @@ namespace chaiscript AST_NodePtr parse_internal(const std::string &t_input, std::string t_fname) { m_position = Position(t_input.begin(), t_input.end()); m_filename = std::make_shared(std::move(t_fname)); - + if (detail::Char_Parser_Helper::has_utf8_bom(t_input)) { throw exception::eval_error("UTF-8 in user provided input!"); } From c09af929637ced3d3e44bb3ea498ee5d53a73a0d Mon Sep 17 00:00:00 2001 From: Alek Mosingiewicz Date: Sun, 13 May 2018 13:09:38 +0200 Subject: [PATCH 06/26] Decrement file size when BOM is present to avoid parsing errors. --- include/chaiscript/language/chaiscript_engine.hpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/include/chaiscript/language/chaiscript_engine.hpp b/include/chaiscript/language/chaiscript_engine.hpp index fa200ce4..640d217e 100644 --- a/include/chaiscript/language/chaiscript_engine.hpp +++ b/include/chaiscript/language/chaiscript_engine.hpp @@ -207,11 +207,11 @@ namespace chaiscript /// Skip BOM at the beginning of file static bool skip_bom(std::ifstream &infile) { - char buffer[4]; + size_t bytes_needed = 3; + std::vector v(bytes_needed); - memset(buffer, '\0', 4); - infile.readsome(buffer, 3); - std::string buffer_string(buffer); + infile.read(&v[0], static_cast(bytes_needed)); + std::string buffer_string(v.begin(), v.end()); if (chaiscript::parser::detail::Char_Parser_Helper::has_utf8_bom(buffer_string)) { infile.seekg(3); @@ -231,12 +231,14 @@ namespace chaiscript throw chaiscript::exception::file_not_found_error(t_filename); } - const auto size = infile.tellg(); + auto size = infile.tellg(); infile.seekg(0, std::ios::beg); assert(size >= 0); - skip_bom(infile); + if (skip_bom(infile)) { + size-=3; // decrement the BOM size from file size, otherwise we'll get parsing errors + } if (size == std::streampos(0)) { From 322568ba3971481828a2c4ead4e285402a39e586 Mon Sep 17 00:00:00 2001 From: Alek Mosingiewicz Date: Tue, 15 May 2018 19:25:28 +0200 Subject: [PATCH 07/26] Check for illegal characters while parsing input. --- include/chaiscript/language/chaiscript_parser.hpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/include/chaiscript/language/chaiscript_parser.hpp b/include/chaiscript/language/chaiscript_parser.hpp index 44edc0b6..52bff8d0 100644 --- a/include/chaiscript/language/chaiscript_parser.hpp +++ b/include/chaiscript/language/chaiscript_parser.hpp @@ -530,6 +530,9 @@ namespace chaiscript bool retval = false; while (m_position.has_more()) { + if(static_cast(*m_position) > 0x7e) { + throw exception::eval_error("Illegal character", File_Position(m_position.line, m_position.col), *m_filename); + } auto end_line = (*m_position != 0) && ((*m_position == '\n') || (*m_position == '\r' && *(m_position+1) == '\n')); if ( char_in_alphabet(*m_position,detail::white_alphabet) || (skip_cr && end_line)) { @@ -2569,9 +2572,9 @@ namespace chaiscript m_position = Position(t_input.begin(), t_input.end()); m_filename = std::make_shared(std::move(t_fname)); - if (detail::Char_Parser_Helper::has_utf8_bom(t_input)) { - throw exception::eval_error("UTF-8 in user provided input!"); - } + //if (detail::Char_Parser_Helper::has_utf8_bom(t_input)) { + // throw exception::eval_error("UTF-8 in user provided input!"); + //} if ((t_input.size() > 1) && (t_input[0] == '#') && (t_input[1] == '!')) { while (m_position.has_more() && (!Eol())) { From 0d44b0b45626344b9470b58063268ef80d51cb0f Mon Sep 17 00:00:00 2001 From: Alek Mosingiewicz Date: Tue, 15 May 2018 19:32:17 +0200 Subject: [PATCH 08/26] Added doc comment. --- include/chaiscript/language/chaiscript_parser.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/include/chaiscript/language/chaiscript_parser.hpp b/include/chaiscript/language/chaiscript_parser.hpp index 52bff8d0..26152abd 100644 --- a/include/chaiscript/language/chaiscript_parser.hpp +++ b/include/chaiscript/language/chaiscript_parser.hpp @@ -526,6 +526,7 @@ namespace chaiscript /// Skips ChaiScript whitespace, which means space and tab, but not cr/lf /// jespada: Modified SkipWS to skip optionally CR ('\n') and/or LF+CR ("\r\n") + /// AlekMosingiewicz: Added exception when illegal character detected bool SkipWS(bool skip_cr=false) { bool retval = false; From 60c0a0bf1568669be0a5c143a868ee7ff89d25cd Mon Sep 17 00:00:00 2001 From: Alek Mosingiewicz Date: Mon, 21 May 2018 17:04:33 +0200 Subject: [PATCH 09/26] Refactor skippable BOM detection. --- include/chaiscript/language/chaiscript_engine.hpp | 7 +++++-- include/chaiscript/language/chaiscript_parser.hpp | 6 ------ 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/include/chaiscript/language/chaiscript_engine.hpp b/include/chaiscript/language/chaiscript_engine.hpp index 640d217e..f405aeab 100644 --- a/include/chaiscript/language/chaiscript_engine.hpp +++ b/include/chaiscript/language/chaiscript_engine.hpp @@ -52,7 +52,6 @@ #include "../dispatchkit/exception_specification.hpp" -#include "chaiscript_parser.hpp" namespace chaiscript { @@ -213,7 +212,11 @@ namespace chaiscript infile.read(&v[0], static_cast(bytes_needed)); std::string buffer_string(v.begin(), v.end()); - if (chaiscript::parser::detail::Char_Parser_Helper::has_utf8_bom(buffer_string)) { + if ((buffer_string.size() > 2) + && (buffer_string[0] == '\xef') + && (buffer_string[1] == '\xbb') + && (buffer_string[2] == '\xbf')) { + infile.seekg(3); return true; } diff --git a/include/chaiscript/language/chaiscript_parser.hpp b/include/chaiscript/language/chaiscript_parser.hpp index 26152abd..b24d3179 100644 --- a/include/chaiscript/language/chaiscript_parser.hpp +++ b/include/chaiscript/language/chaiscript_parser.hpp @@ -114,12 +114,6 @@ namespace chaiscript // little SFINAE trick to avoid base class return Char_Parser_Helper::u8str_from_ll(val); } - - static bool has_utf8_bom(const std::string &t_input) - { - //skip UTF-8 BOM - return ((t_input.size() > 2) && (t_input[0] == '\xef') && (t_input[1] == '\xbb' && t_input[2] == '\xbf')); - } }; } From b70a9e7a614e224ce68cda7fc25f10198ecc84a9 Mon Sep 17 00:00:00 2001 From: Alek Mosingiewicz Date: Mon, 21 May 2018 17:12:11 +0200 Subject: [PATCH 10/26] Non-ASCII characters now in random positions in test; test renamed. --- unittests/compiled_tests.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unittests/compiled_tests.cpp b/unittests/compiled_tests.cpp index 66d804f7..4bbfc9ff 100644 --- a/unittests/compiled_tests.cpp +++ b/unittests/compiled_tests.cpp @@ -352,10 +352,10 @@ TEST_CASE("Functor cast") CHECK(d == 3 * 6); } -TEST_CASE("BOM at beginning of string") +TEST_CASE("Non-ASCII characters in string") { chaiscript::ChaiScript_Basic chai(create_chaiscript_stdlib(),create_chaiscript_parser()); - CHECK_THROWS_AS(chai.eval("\xef\xbb\xbfprint \"Hello World\""), chaiscript::exception::eval_error); + CHECK_THROWS_AS(chai.eval("prin\xeft \"Hello World\""), chaiscript::exception::eval_error); } From d880d46214311f15536105116261ec01459b1f8c Mon Sep 17 00:00:00 2001 From: Alek Mosingiewicz Date: Tue, 22 May 2018 16:23:22 +0200 Subject: [PATCH 11/26] Type cast fix. --- include/chaiscript/language/chaiscript_parser.hpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/include/chaiscript/language/chaiscript_parser.hpp b/include/chaiscript/language/chaiscript_parser.hpp index 48961250..cc296454 100644 --- a/include/chaiscript/language/chaiscript_parser.hpp +++ b/include/chaiscript/language/chaiscript_parser.hpp @@ -525,7 +525,7 @@ namespace chaiscript bool retval = false; while (m_position.has_more()) { - if(static_cast(*m_position) > 0x7e) { + if(static_cast(*m_position) > 0x7e) { throw exception::eval_error("Illegal character", File_Position(m_position.line, m_position.col), *m_filename); } auto end_line = (*m_position != 0) && ((*m_position == '\n') || (*m_position == '\r' && *(m_position+1) == '\n')); @@ -2599,10 +2599,6 @@ namespace chaiscript m_position = Position(t_input.begin(), t_input.end()); m_filename = std::make_shared(std::move(t_fname)); - //if (detail::Char_Parser_Helper::has_utf8_bom(t_input)) { - // throw exception::eval_error("UTF-8 in user provided input!"); - //} - if ((t_input.size() > 1) && (t_input[0] == '#') && (t_input[1] == '!')) { while (m_position.has_more() && (!Eol())) { ++m_position; From f9615efea54ab0886b803592139e8919b25e0367 Mon Sep 17 00:00:00 2001 From: Alek Mosingiewicz Date: Tue, 22 May 2018 16:27:19 +0200 Subject: [PATCH 12/26] Another text size assertion. --- include/chaiscript/language/chaiscript_engine.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/include/chaiscript/language/chaiscript_engine.hpp b/include/chaiscript/language/chaiscript_engine.hpp index f405aeab..0c18f783 100644 --- a/include/chaiscript/language/chaiscript_engine.hpp +++ b/include/chaiscript/language/chaiscript_engine.hpp @@ -241,6 +241,7 @@ namespace chaiscript if (skip_bom(infile)) { size-=3; // decrement the BOM size from file size, otherwise we'll get parsing errors + assert(size >=0 ); //and check if there's more text } if (size == std::streampos(0)) From df6bc8f9b5e35e7c24b5772fe3c3efd4f40c4a1a Mon Sep 17 00:00:00 2001 From: Alek Mosingiewicz Date: Tue, 22 May 2018 17:07:32 +0200 Subject: [PATCH 13/26] Add missing test cases. --- unittests/compiled_tests.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/unittests/compiled_tests.cpp b/unittests/compiled_tests.cpp index e61f9e9e..1fda793f 100644 --- a/unittests/compiled_tests.cpp +++ b/unittests/compiled_tests.cpp @@ -352,12 +352,23 @@ TEST_CASE("Functor cast") CHECK(d == 3 * 6); } -TEST_CASE("Non-ASCII characters in string") +TEST_CASE("Non-ASCII characters in the middle of string") { chaiscript::ChaiScript_Basic chai(create_chaiscript_stdlib(),create_chaiscript_parser()); CHECK_THROWS_AS(chai.eval("prin\xeft \"Hello World\""), chaiscript::exception::eval_error); } +TEST_CASE("Non-ASCII characters in the beginning of string") +{ + chaiscript::ChaiScript_Basic chai(create_chaiscript_stdlib(),create_chaiscript_parser()); + CHECK_THROWS_AS(chai.eval("\xefprint \"Hello World\""), chaiscript::exception::eval_error); +} + +TEST_CASE("Non-ASCII characters in the end of string") +{ + chaiscript::ChaiScript_Basic chai(create_chaiscript_stdlib(),create_chaiscript_parser()); + CHECK_THROWS_AS(chai.eval("print \"Hello World\"\xef"), chaiscript::exception::eval_error); +} int set_state_test_myfun() { From 67dcd3e8d8ab474873999cf868985651c9980e7a Mon Sep 17 00:00:00 2001 From: Alek Mosingiewicz Date: Tue, 22 May 2018 17:12:14 +0200 Subject: [PATCH 14/26] Test case for BOM in user-provided string. --- unittests/compiled_tests.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/unittests/compiled_tests.cpp b/unittests/compiled_tests.cpp index 1fda793f..8723d23d 100644 --- a/unittests/compiled_tests.cpp +++ b/unittests/compiled_tests.cpp @@ -370,6 +370,12 @@ TEST_CASE("Non-ASCII characters in the end of string") CHECK_THROWS_AS(chai.eval("print \"Hello World\"\xef"), chaiscript::exception::eval_error); } +TEST_CASE("BOM in string") +{ + chaiscript::ChaiScript_Basic chai(create_chaiscript_stdlib(),create_chaiscript_parser()); + CHECK_THROWS_AS(chai.eval("\xef\xbb\xbfprint \"Hello World\""), chaiscript::exception::eval_error); +} + int set_state_test_myfun() { return 2; From 4ada12a34c39739cc926d7931a650002cdd319a2 Mon Sep 17 00:00:00 2001 From: Alek Mosingiewicz Date: Wed, 23 May 2018 18:41:07 +0200 Subject: [PATCH 15/26] Check EOF rather than buffer_size when skipping BOM. --- include/chaiscript/language/chaiscript_engine.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/chaiscript/language/chaiscript_engine.hpp b/include/chaiscript/language/chaiscript_engine.hpp index 0c18f783..a8f35007 100644 --- a/include/chaiscript/language/chaiscript_engine.hpp +++ b/include/chaiscript/language/chaiscript_engine.hpp @@ -212,7 +212,7 @@ namespace chaiscript infile.read(&v[0], static_cast(bytes_needed)); std::string buffer_string(v.begin(), v.end()); - if ((buffer_string.size() > 2) + if (!infile.eof() && (buffer_string[0] == '\xef') && (buffer_string[1] == '\xbb') && (buffer_string[2] == '\xbf')) { From ac10575b5f2e9657bb9ba24bf45579cdc5803a94 Mon Sep 17 00:00:00 2001 From: Alek Mosingiewicz Date: Thu, 24 May 2018 21:38:47 +0200 Subject: [PATCH 16/26] Read the stream byte by byte, condition for size when skipping BOM. --- .../chaiscript/language/chaiscript_engine.hpp | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/include/chaiscript/language/chaiscript_engine.hpp b/include/chaiscript/language/chaiscript_engine.hpp index a8f35007..1c2d22e6 100644 --- a/include/chaiscript/language/chaiscript_engine.hpp +++ b/include/chaiscript/language/chaiscript_engine.hpp @@ -207,15 +207,18 @@ namespace chaiscript /// Skip BOM at the beginning of file static bool skip_bom(std::ifstream &infile) { size_t bytes_needed = 3; - std::vector v(bytes_needed); + size_t bytes_read = 0; + char buffer[256]; - infile.read(&v[0], static_cast(bytes_needed)); - std::string buffer_string(v.begin(), v.end()); + while (bytes_read < bytes_needed) { + infile >> buffer; + bytes_read++; + } - if (!infile.eof() - && (buffer_string[0] == '\xef') - && (buffer_string[1] == '\xbb') - && (buffer_string[2] == '\xbf')) { + if (bytes_needed == bytes_read + && (buffer[0] == '\xef') + && (buffer[1] == '\xbb') + && (buffer[2] == '\xbf')) { infile.seekg(3); return true; @@ -239,7 +242,7 @@ namespace chaiscript assert(size >= 0); - if (skip_bom(infile)) { + if (size >= 3 && skip_bom(infile)) { size-=3; // decrement the BOM size from file size, otherwise we'll get parsing errors assert(size >=0 ); //and check if there's more text } From edadb7aa98eb7c713b943aaae142f2bd6dbd7e81 Mon Sep 17 00:00:00 2001 From: Alek Mosingiewicz Date: Thu, 24 May 2018 22:04:10 +0200 Subject: [PATCH 17/26] Use readsome instead of reading the stream byte-by-byte to detect BOM in processed file. --- include/chaiscript/language/chaiscript_engine.hpp | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/include/chaiscript/language/chaiscript_engine.hpp b/include/chaiscript/language/chaiscript_engine.hpp index 1c2d22e6..8ff6875e 100644 --- a/include/chaiscript/language/chaiscript_engine.hpp +++ b/include/chaiscript/language/chaiscript_engine.hpp @@ -206,14 +206,11 @@ namespace chaiscript /// Skip BOM at the beginning of file static bool skip_bom(std::ifstream &infile) { - size_t bytes_needed = 3; - size_t bytes_read = 0; - char buffer[256]; + std::streamsize bytes_needed = 3; + std::streamsize bytes_read = 0; + char buffer[3]; - while (bytes_read < bytes_needed) { - infile >> buffer; - bytes_read++; - } + bytes_read = infile.readsome(buffer, bytes_needed); if (bytes_needed == bytes_read && (buffer[0] == '\xef') @@ -242,7 +239,7 @@ namespace chaiscript assert(size >= 0); - if (size >= 3 && skip_bom(infile)) { + if (skip_bom(infile)) { size-=3; // decrement the BOM size from file size, otherwise we'll get parsing errors assert(size >=0 ); //and check if there's more text } From 51bb7936642f1c1bd06432ad821c253e1536099c Mon Sep 17 00:00:00 2001 From: Alek Mosingiewicz Date: Thu, 24 May 2018 22:06:59 +0200 Subject: [PATCH 18/26] Initialize buffer to store potential BOM data before storing anything inside it. --- include/chaiscript/language/chaiscript_engine.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/chaiscript/language/chaiscript_engine.hpp b/include/chaiscript/language/chaiscript_engine.hpp index 8ff6875e..a642f249 100644 --- a/include/chaiscript/language/chaiscript_engine.hpp +++ b/include/chaiscript/language/chaiscript_engine.hpp @@ -208,7 +208,7 @@ namespace chaiscript static bool skip_bom(std::ifstream &infile) { std::streamsize bytes_needed = 3; std::streamsize bytes_read = 0; - char buffer[3]; + char buffer[3] = { '\0' }; bytes_read = infile.readsome(buffer, bytes_needed); From 51693aa0bd91e3133029687d83ffa662b9177f2f Mon Sep 17 00:00:00 2001 From: Alek Mosingiewicz Date: Fri, 25 May 2018 06:57:22 +0200 Subject: [PATCH 19/26] Skip buffer initialization. --- include/chaiscript/language/chaiscript_engine.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/chaiscript/language/chaiscript_engine.hpp b/include/chaiscript/language/chaiscript_engine.hpp index a642f249..8ff6875e 100644 --- a/include/chaiscript/language/chaiscript_engine.hpp +++ b/include/chaiscript/language/chaiscript_engine.hpp @@ -208,7 +208,7 @@ namespace chaiscript static bool skip_bom(std::ifstream &infile) { std::streamsize bytes_needed = 3; std::streamsize bytes_read = 0; - char buffer[3] = { '\0' }; + char buffer[3]; bytes_read = infile.readsome(buffer, bytes_needed); From 0e964da42658a4e7e5bebe281f2c125575f34246 Mon Sep 17 00:00:00 2001 From: Alek Mosingiewicz Date: Fri, 25 May 2018 07:59:34 +0200 Subject: [PATCH 20/26] Attempt to remedy the problem occuring on Clang. --- include/chaiscript/language/chaiscript_engine.hpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/chaiscript/language/chaiscript_engine.hpp b/include/chaiscript/language/chaiscript_engine.hpp index 8ff6875e..a13d51f0 100644 --- a/include/chaiscript/language/chaiscript_engine.hpp +++ b/include/chaiscript/language/chaiscript_engine.hpp @@ -206,14 +206,14 @@ namespace chaiscript /// Skip BOM at the beginning of file static bool skip_bom(std::ifstream &infile) { - std::streamsize bytes_needed = 3; - std::streamsize bytes_read = 0; + size_t bytes_needed = 3; char buffer[3]; - bytes_read = infile.readsome(buffer, bytes_needed); + memset(buffer, '\0', bytes_needed); - if (bytes_needed == bytes_read - && (buffer[0] == '\xef') + infile.readsome(buffer, bytes_needed); + + if ((buffer[0] == '\xef') && (buffer[1] == '\xbb') && (buffer[2] == '\xbf')) { From 42c355a8d0d9a9d014f3205fb474989bb706de50 Mon Sep 17 00:00:00 2001 From: Alek Mosingiewicz Date: Fri, 25 May 2018 08:06:35 +0200 Subject: [PATCH 21/26] Revert "Attempt to remedy the problem occuring on Clang." This reverts commit 0e964da42658a4e7e5bebe281f2c125575f34246. --- include/chaiscript/language/chaiscript_engine.hpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/chaiscript/language/chaiscript_engine.hpp b/include/chaiscript/language/chaiscript_engine.hpp index a13d51f0..8ff6875e 100644 --- a/include/chaiscript/language/chaiscript_engine.hpp +++ b/include/chaiscript/language/chaiscript_engine.hpp @@ -206,14 +206,14 @@ namespace chaiscript /// Skip BOM at the beginning of file static bool skip_bom(std::ifstream &infile) { - size_t bytes_needed = 3; + std::streamsize bytes_needed = 3; + std::streamsize bytes_read = 0; char buffer[3]; - memset(buffer, '\0', bytes_needed); + bytes_read = infile.readsome(buffer, bytes_needed); - infile.readsome(buffer, bytes_needed); - - if ((buffer[0] == '\xef') + if (bytes_needed == bytes_read + && (buffer[0] == '\xef') && (buffer[1] == '\xbb') && (buffer[2] == '\xbf')) { From 1711d50effc6ac508bd825df042fef4f4ce5d07e Mon Sep 17 00:00:00 2001 From: Alek Mosingiewicz Date: Fri, 25 May 2018 08:38:25 +0200 Subject: [PATCH 22/26] Another attempt to remedy the problem occuring on Clang. --- include/chaiscript/language/chaiscript_engine.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/chaiscript/language/chaiscript_engine.hpp b/include/chaiscript/language/chaiscript_engine.hpp index 8ff6875e..34f48a1e 100644 --- a/include/chaiscript/language/chaiscript_engine.hpp +++ b/include/chaiscript/language/chaiscript_engine.hpp @@ -210,7 +210,7 @@ namespace chaiscript std::streamsize bytes_read = 0; char buffer[3]; - bytes_read = infile.readsome(buffer, bytes_needed); + bytes_read = infile.readsome(&buffer[0], bytes_needed); if (bytes_needed == bytes_read && (buffer[0] == '\xef') From 393f8d31ab6975e353fa9f69286b4e84c392f7aa Mon Sep 17 00:00:00 2001 From: Alek Mosingiewicz Date: Fri, 25 May 2018 11:48:27 +0200 Subject: [PATCH 23/26] Travis build quick fix. --- include/chaiscript/language/chaiscript_engine.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/chaiscript/language/chaiscript_engine.hpp b/include/chaiscript/language/chaiscript_engine.hpp index 34f48a1e..629a9245 100644 --- a/include/chaiscript/language/chaiscript_engine.hpp +++ b/include/chaiscript/language/chaiscript_engine.hpp @@ -208,9 +208,9 @@ namespace chaiscript static bool skip_bom(std::ifstream &infile) { std::streamsize bytes_needed = 3; std::streamsize bytes_read = 0; - char buffer[3]; + char buffer[4]; - bytes_read = infile.readsome(&buffer[0], bytes_needed); + bytes_read = infile.readsome(buffer, bytes_needed); if (bytes_needed == bytes_read && (buffer[0] == '\xef') From fb635033a938ecc600b09b3475a56a54c41f4e9c Mon Sep 17 00:00:00 2001 From: Alek Mosingiewicz Date: Fri, 25 May 2018 11:56:10 +0200 Subject: [PATCH 24/26] Fix for Clang. --- include/chaiscript/language/chaiscript_engine.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/chaiscript/language/chaiscript_engine.hpp b/include/chaiscript/language/chaiscript_engine.hpp index 629a9245..9b2384b1 100644 --- a/include/chaiscript/language/chaiscript_engine.hpp +++ b/include/chaiscript/language/chaiscript_engine.hpp @@ -212,7 +212,7 @@ namespace chaiscript bytes_read = infile.readsome(buffer, bytes_needed); - if (bytes_needed == bytes_read + if (!bytes_needed < bytes_read && (buffer[0] == '\xef') && (buffer[1] == '\xbb') && (buffer[2] == '\xbf')) { From 0f67b2f43077d1dc2cd71478ed31fac7b1184fad Mon Sep 17 00:00:00 2001 From: Alek Mosingiewicz Date: Fri, 25 May 2018 12:07:50 +0200 Subject: [PATCH 25/26] Another fix for Clang. --- include/chaiscript/language/chaiscript_engine.hpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/chaiscript/language/chaiscript_engine.hpp b/include/chaiscript/language/chaiscript_engine.hpp index 9b2384b1..e903dbde 100644 --- a/include/chaiscript/language/chaiscript_engine.hpp +++ b/include/chaiscript/language/chaiscript_engine.hpp @@ -206,14 +206,14 @@ namespace chaiscript /// Skip BOM at the beginning of file static bool skip_bom(std::ifstream &infile) { - std::streamsize bytes_needed = 3; - std::streamsize bytes_read = 0; - char buffer[4]; + size_t bytes_needed = 3; + char buffer[3]; - bytes_read = infile.readsome(buffer, bytes_needed); + memset(buffer, '\0', bytes_needed); - if (!bytes_needed < bytes_read - && (buffer[0] == '\xef') + infile.read(buffer, bytes_needed); + + if ((buffer[0] == '\xef') && (buffer[1] == '\xbb') && (buffer[2] == '\xbf')) { From b3f77f0c8282295809a2e5df9698e32badcd8fa6 Mon Sep 17 00:00:00 2001 From: Alek Mosingiewicz Date: Fri, 25 May 2018 12:17:22 +0200 Subject: [PATCH 26/26] Fix implicit conversion warning. --- include/chaiscript/language/chaiscript_engine.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/chaiscript/language/chaiscript_engine.hpp b/include/chaiscript/language/chaiscript_engine.hpp index e903dbde..889955c2 100644 --- a/include/chaiscript/language/chaiscript_engine.hpp +++ b/include/chaiscript/language/chaiscript_engine.hpp @@ -211,7 +211,7 @@ namespace chaiscript memset(buffer, '\0', bytes_needed); - infile.read(buffer, bytes_needed); + infile.read(buffer, static_cast(bytes_needed)); if ((buffer[0] == '\xef') && (buffer[1] == '\xbb')