Changes to parser.

This commit is contained in:
clanmills 2011-02-14 10:33:40 -08:00
parent 80f576a2f3
commit 894063261e
2 changed files with 195 additions and 180 deletions

View File

@ -15,8 +15,25 @@
#include "chaiscript_prelude.hpp" #include "chaiscript_prelude.hpp"
#include "chaiscript_common.hpp" #include "chaiscript_common.hpp"
#define lengthof(x) (((int)sizeof(x))/((int)sizeof(x[0])))
namespace chaiscript namespace chaiscript
{ {
enum Alphabet
{ symbol_alphabet = 0
, keyword_alphabet
, int_alphabet
, float_alphabet
, x_alphabet
, hex_alphabet
, b_alphabet
, bin_alphabet
, id_alphabet
, white_alphabet
, max_alphabet
};
class ChaiScript_Parser { class ChaiScript_Parser {
std::string::const_iterator m_input_pos, m_input_end; std::string::const_iterator m_input_pos, m_input_end;
@ -26,6 +43,7 @@ namespace chaiscript
std::string m_singleline_comment; std::string m_singleline_comment;
boost::shared_ptr<std::string> m_filename; boost::shared_ptr<std::string> m_filename;
std::vector<AST_NodePtr> m_match_stack; std::vector<AST_NodePtr> m_match_stack;
bool alphabet[max_alphabet][256];
std::vector<std::vector<std::string> > m_operator_matches; std::vector<std::vector<std::string> > m_operator_matches;
std::vector<AST_Node_Type::Type> m_operators; std::vector<AST_Node_Type::Type> m_operators;
@ -42,7 +60,7 @@ namespace chaiscript
ChaiScript_Parser(const ChaiScript_Parser &); // explicitly unimplemented copy constructor ChaiScript_Parser(const ChaiScript_Parser &); // explicitly unimplemented copy constructor
ChaiScript_Parser &operator=(const ChaiScript_Parser &); // explicitly unimplemented assignment operator ChaiScript_Parser &operator=(const ChaiScript_Parser &); // explicitly unimplemented assignment operator
void setup_operators() void setup_operators()
{ {
m_operators.push_back(AST_Node_Type::Logical_Or); m_operators.push_back(AST_Node_Type::Logical_Or);
std::vector<std::string> logical_or; std::vector<std::string> logical_or;
@ -106,7 +124,58 @@ namespace chaiscript
std::vector<std::string> dot_access; std::vector<std::string> dot_access;
dot_access.push_back("."); dot_access.push_back(".");
m_operator_matches.push_back(dot_access); m_operator_matches.push_back(dot_access);
int c;
for ( c = 0 ; c < lengthof(alphabet[0]) ; c++ ) {
for ( int a = 0 ; a < max_alphabet ; a ++ ) {
alphabet[a][c]=false;
}
}
alphabet[symbol_alphabet]['+']=true;
alphabet[symbol_alphabet]['-']=true;
alphabet[symbol_alphabet]['*']=true;
alphabet[symbol_alphabet]['/']=true;
alphabet[symbol_alphabet]['|']=true;
alphabet[symbol_alphabet]['&']=true;
alphabet[symbol_alphabet]['^']=true;
alphabet[symbol_alphabet]['=']=true;
alphabet[symbol_alphabet]['.']=true;
alphabet[symbol_alphabet]['<']=true;
alphabet[symbol_alphabet]['>']=true;
for ( c = 'a' ; c <= 'z' ; c++ ) alphabet[keyword_alphabet][c]=true;
for ( c = 'A' ; c <= 'Z' ; c++ ) alphabet[keyword_alphabet][c]=true;
for ( c = '0' ; c <= '9' ; c++ ) alphabet[keyword_alphabet][c]=true;
alphabet[keyword_alphabet]['_']=true;
for ( c = '0' ; c <= '9' ; c++ ) alphabet[int_alphabet][c]=true;
for ( c = '0' ; c <= '9' ; c++ ) alphabet[float_alphabet][c]=true;
alphabet[float_alphabet]['.']=true;
for ( c = '0' ; c <= '9' ; c++ ) alphabet[hex_alphabet][c]=true;
for ( c = 'a' ; c <= 'f' ; c++ ) alphabet[hex_alphabet][c]=true;
for ( c = 'A' ; c <= 'F' ; c++ ) alphabet[hex_alphabet][c]=true;
alphabet[x_alphabet]['x']=true;
alphabet[x_alphabet]['X']=true;
for ( c = '0' ; c <= '1' ; c++ ) alphabet[bin_alphabet][c]=true;
alphabet[b_alphabet]['b']=true;
alphabet[b_alphabet]['B']=true;
for ( c = 'a' ; c <= 'z' ; c++ ) alphabet[id_alphabet][c]=true;
for ( c = 'A' ; c <= 'Z' ; c++ ) alphabet[id_alphabet][c]=true;
alphabet[id_alphabet]['_'] = true;
alphabet[white_alphabet][' ']=true;
alphabet[white_alphabet]['\t']=true;
} }
/**
* test a char in an alphabet
*/
bool char_in_alphabet(unsigned char c,Alphabet a) { return alphabet[a][c]; }
/** /**
* Prints the parsed ast_nodes as a tree * Prints the parsed ast_nodes as a tree
*/ */
@ -185,6 +254,7 @@ namespace chaiscript
/** /**
* Does ranged char check * Does ranged char check
*/ */
/*
inline bool char_between(char t_start, char t_end) { inline bool char_between(char t_start, char t_end) {
if ((*m_input_pos >= t_start) && (*m_input_pos <= t_end)) { if ((*m_input_pos >= t_start) && (*m_input_pos <= t_end)) {
return true; return true;
@ -193,7 +263,7 @@ namespace chaiscript
return false; return false;
} }
} }
*/
/** /**
* Check to see if there is more text parse * Check to see if there is more text parse
*/ */
@ -245,7 +315,7 @@ namespace chaiscript
bool SkipWS() { bool SkipWS() {
bool retval = false; bool retval = false;
while (has_more_input()) { while (has_more_input()) {
if ((*m_input_pos == ' ') || (*m_input_pos == '\t')) { if ( char_in_alphabet(*m_input_pos,white_alphabet) ) { // (*m_input_pos == ' ') || (*m_input_pos == '\t')) {
++m_input_pos; ++m_input_pos;
++m_col; ++m_col;
retval = true; retval = true;
@ -267,17 +337,17 @@ namespace chaiscript
bool retval = false; bool retval = false;
std::string::const_iterator start = m_input_pos; std::string::const_iterator start = m_input_pos;
if (has_more_input() && (char_between('0', '9') || (*m_input_pos == '.'))) { if (has_more_input() && char_in_alphabet(*m_input_pos,float_alphabet) ) { // (char_between('0', '9') || (*m_input_pos == '.'))) {
while (has_more_input() && char_between('0', '9')) { while (has_more_input() && char_in_alphabet(*m_input_pos,int_alphabet) ) { // char_between('0', '9')) {
++m_input_pos; ++m_input_pos;
++m_col; ++m_col;
} }
if (has_more_input() && (*m_input_pos == '.')) { if (has_more_input() && (*m_input_pos == '.')) {
++m_input_pos; ++m_input_pos;
++m_col; ++m_col;
if (has_more_input() && char_between('0', '9')) { if (has_more_input() && char_in_alphabet(*m_input_pos,int_alphabet)) { // char_between('0', '9')) {
retval = true; retval = true;
while (has_more_input() && char_between('0', '9')) { while (has_more_input() && char_in_alphabet(*m_input_pos,int_alphabet) ) { //char_between('0', '9')) {
++m_input_pos; ++m_input_pos;
++m_col; ++m_col;
} }
@ -300,16 +370,16 @@ namespace chaiscript
++m_input_pos; ++m_input_pos;
++m_col; ++m_col;
if (has_more_input() && ((*m_input_pos == 'x') || (*m_input_pos == 'X'))) { if (has_more_input() && char_in_alphabet(*m_input_pos,x_alphabet) ) { // ((*m_input_pos == 'x') || (*m_input_pos == 'X'))) {
++m_input_pos; ++m_input_pos;
++m_col; ++m_col;
if (has_more_input() && (char_between('0', '9') || if (has_more_input() && char_in_alphabet(*m_input_pos,hex_alphabet)) { // (char_between('0', '9') ||
char_between('a', 'f') || // char_between('a', 'f') ||
char_between('A', 'F'))) { // char_between('A', 'F'))) {
retval = true; retval = true;
while (has_more_input() && (char_between('0', '9') || while (has_more_input() && char_in_alphabet(*m_input_pos,hex_alphabet) ) { // (char_between('0', '9') ||
char_between('a', 'f') || // char_between('a', 'f') ||
char_between('A', 'F'))) { // char_between('A', 'F'))) {
++m_input_pos; ++m_input_pos;
++m_col; ++m_col;
} }
@ -337,12 +407,12 @@ namespace chaiscript
++m_input_pos; ++m_input_pos;
++m_col; ++m_col;
if (has_more_input() && ((*m_input_pos == 'b') || (*m_input_pos == 'B'))) { if (has_more_input() && char_in_alphabet(*m_input_pos,b_alphabet) ) { // ((*m_input_pos == 'b') || (*m_input_pos == 'B'))) {
++m_input_pos; ++m_input_pos;
++m_col; ++m_col;
if (has_more_input() && char_between('0', '1')) { if (has_more_input() && char_in_alphabet(*m_input_pos,bin_alphabet) ) { // char_between('0', '1')) {
retval = true; retval = true;
while (has_more_input() && char_between('0', '1')) { while (has_more_input() && char_in_alphabet(*m_input_pos,bin_alphabet) ) { // char_between('0', '1')) {
++m_input_pos; ++m_input_pos;
++m_col; ++m_col;
} }
@ -374,7 +444,7 @@ namespace chaiscript
std::string::const_iterator start = m_input_pos; std::string::const_iterator start = m_input_pos;
int prev_col = m_col; int prev_col = m_col;
int prev_line = m_line; int prev_line = m_line;
if (has_more_input() && (char_between('0', '9') || (*m_input_pos == '.')) ) { if (has_more_input() && char_in_alphabet(*m_input_pos,float_alphabet) ) { // (char_between('0', '9') || (*m_input_pos == '.')) ) {
if (Hex_()) { if (Hex_()) {
std::string match(start, m_input_pos); std::string match(start, m_input_pos);
std::stringstream ss(match); std::stringstream ss(match);
@ -442,10 +512,11 @@ namespace chaiscript
*/ */
bool Id_() { bool Id_() {
bool retval = false; bool retval = false;
if (has_more_input() && (char_between('A', 'Z') || (*m_input_pos == '_') || char_between('a', 'z'))) { if (has_more_input() && char_in_alphabet(*m_input_pos,id_alphabet)) { // (char_between('A', 'Z') || (*m_input_pos == '_') || char_between('a', 'z'))) {
retval = true; retval = true;
while (has_more_input() && (char_between('A', 'Z') || (*m_input_pos == '_') || while (has_more_input() && char_in_alphabet(*m_input_pos,keyword_alphabet) ) {
char_between('a', 'z') || char_between('0', '9'))) { // (char_between('A', 'Z') || (*m_input_pos == '_') ||
// char_between('a', 'z') || char_between('0', '9'))) {
++m_input_pos; ++m_input_pos;
++m_col; ++m_col;
} }
@ -888,49 +959,24 @@ namespace chaiscript
*/ */
bool Keyword(const char *t_s, bool t_capture = false) { bool Keyword(const char *t_s, bool t_capture = false) {
SkipWS(); SkipWS();
std::string::const_iterator start = m_input_pos;
if (!t_capture) { int prev_col = m_col;
std::string::const_iterator start = m_input_pos; int prev_line = m_line;
int prev_col = m_col; bool retval = Keyword_(t_s);
int prev_line = m_line; // ignore substring matches
bool retval = Keyword_(t_s); if ( retval && has_more_input() && char_in_alphabet(*m_input_pos,keyword_alphabet) ) {
if (retval) {
//todo: fix this. Hacky workaround for preventing substring matches
if (has_more_input() && (char_between('A', 'Z') || (*m_input_pos == '_') ||
char_between('a', 'z') || char_between('0', '9'))) {
m_input_pos = start; m_input_pos = start;
m_col = prev_col; m_col = prev_col;
m_line = prev_line; m_line = prev_line;
return false; retval = false;
}
return true;
}
else {
return retval;
}
} }
else {
std::string::const_iterator start = m_input_pos; if ( t_capture && retval ) {
int prev_col = m_col;
int prev_line = m_line;
if (Keyword_(t_s)) {
//todo: fix this. Hacky workaround for preventing substring matches
if (has_more_input() && (char_between('A', 'Z') || (*m_input_pos == '_') ||
char_between('a', 'z') || char_between('0', '9'))) {
m_input_pos = start;
m_col = prev_col;
m_line = prev_line;
return false;
}
std::string match(start, m_input_pos); std::string match(start, m_input_pos);
AST_NodePtr t(new Str_AST_Node(match, AST_Node_Type::Str, m_filename, prev_line, prev_col, m_line, m_col)); AST_NodePtr t(new Str_AST_Node(match, AST_Node_Type::Str, m_filename, prev_line, prev_col, m_line, m_col));
m_match_stack.push_back(t); m_match_stack.push_back(t);
return true;
}
else {
return false;
}
} }
return retval;
} }
/** /**
@ -961,55 +1007,25 @@ namespace chaiscript
*/ */
bool Symbol(const char *t_s, bool t_capture = false, bool t_disallow_prevention=false) { bool Symbol(const char *t_s, bool t_capture = false, bool t_disallow_prevention=false) {
SkipWS(); SkipWS();
std::string::const_iterator start = m_input_pos;
int prev_col = m_col;
int prev_line = m_line;
bool retval = Symbol_(t_s);
// ignore substring matches
if (retval && has_more_input() && (t_disallow_prevention == false) && char_in_alphabet(*m_input_pos,symbol_alphabet)) {
m_input_pos = start;
m_col = prev_col;
m_line = prev_line;
retval = false;
}
if (!t_capture) { if ( t_capture && retval ) {
std::string::const_iterator start = m_input_pos; std::string match(start, m_input_pos);
int prev_col = m_col; AST_NodePtr t(new Str_AST_Node(match, AST_Node_Type::Str, m_filename, prev_line, prev_col, m_line, m_col));
int prev_line = m_line; m_match_stack.push_back(t);
bool retval = Symbol_(t_s);
if (retval) {
//todo: fix this. Hacky workaround for preventing substring matches
if (has_more_input() && (t_disallow_prevention == false) &&
((*m_input_pos == '+') || (*m_input_pos == '-') || (*m_input_pos == '*') || (*m_input_pos == '/') ||
(*m_input_pos == '|') || (*m_input_pos == '&') || (*m_input_pos == '^') || (*m_input_pos == '=') ||
(*m_input_pos == '.') || (*m_input_pos == '<') || (*m_input_pos == '>'))) {
m_input_pos = start;
m_col = prev_col;
m_line = prev_line;
return false;
}
return true;
}
else {
return retval;
}
}
else {
std::string::const_iterator start = m_input_pos;
int prev_col = m_col;
int prev_line = m_line;
if (Symbol_(t_s)) {
//todo: fix this. Hacky workaround for preventing substring matches
if (has_more_input() && (t_disallow_prevention == false) &&
((*m_input_pos == '+') || (*m_input_pos == '-') || (*m_input_pos == '*') || (*m_input_pos == '/') ||
(*m_input_pos == '|') || (*m_input_pos == '&') || (*m_input_pos == '^') || (*m_input_pos == '=') ||
(*m_input_pos == '.') || (*m_input_pos == '<') || (*m_input_pos == '>'))) {
m_input_pos = start;
m_col = prev_col;
m_line = prev_line;
return false;
}
else {
std::string match(start, m_input_pos);
AST_NodePtr t(new Str_AST_Node(match, AST_Node_Type::Str, m_filename, prev_line, prev_col, m_line, m_col));
m_match_stack.push_back(t);
return true;
}
}
else {
return false;
}
} }
return retval;
} }
/** /**
@ -1971,7 +1987,6 @@ namespace chaiscript
return false; return false;
} }
} }
}; };
} }

View File

@ -5,18 +5,25 @@
// http://www.chaiscript.com // http://www.chaiscript.com
#include <iostream> #include <iostream>
#include <list> #include <list>
#define _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_WARNINGS
#include <chaiscript/chaiscript.hpp>
#ifdef READLINE_AVAILABLE #ifdef READLINE_AVAILABLE
#include <readline/readline.h> #include <readline/readline.h>
#include <readline/history.h> #include <readline/history.h>
#else
static char* readline(const char* p)
{
std::string retval;
std::cout << p ;
std::getline(std::cin, retval);
return std::cin.eof() ? NULL : _strdup(retval.c_str());
}
static void add_history(const char*){}
static void using_history(){}
#endif #endif
#include <chaiscript/chaiscript.hpp>
void print_help() { void print_help() {
std::cout << "ChaiScript evaluator. To evaluate an expression, type it and press <enter>." << std::endl; std::cout << "ChaiScript evaluator. To evaluate an expression, type it and press <enter>." << std::endl;
std::cout << "Additionally, you can inspect the runtime system using:" << std::endl; std::cout << "Additionally, you can inspect the runtime system using:" << std::endl;
@ -24,7 +31,6 @@ void print_help() {
std::cout << " dump_object(x) - dumps information about the given symbol" << std::endl; std::cout << " dump_object(x) - dumps information about the given symbol" << std::endl;
} }
bool throws_exception(const chaiscript::Proxy_Function &f) bool throws_exception(const chaiscript::Proxy_Function &f)
{ {
try { try {
@ -36,112 +42,106 @@ bool throws_exception(const chaiscript::Proxy_Function &f)
return false; return false;
} }
std::string get_next_command() { std::string get_next_command() {
#ifdef READLINE_AVAILABLE std::string retval("quit");
char *input_raw; if ( ! std::cin.eof() ) {
input_raw = readline("eval> "); char *input_raw = readline("eval> ");
add_history(input_raw); if ( input_raw ) {
return std::string(input_raw); add_history(input_raw);
#else retval = input_raw;
std::string retval; ::free(input_raw);
std::cout << "eval> "; if ( retval == "help" ) {
std::getline(std::cin, retval); print_help();
retval="";
}
if ( retval == "quit" || retval == "exit" ) {
retval="exit(0)";
}
}
}
return retval; return retval;
#endif
} }
// We have to wrap exit with our own because Clang has a hard time with // We have to wrap exit with our own because Clang has a hard time with
// function pointers to functions with special attributes (system exit being marked NORETURN) // function pointers to functions with special attributes (system exit being marked NORETURN)
void myexit(int return_val) { void myexit(int return_val) {
std::cout << "thanks for using ChaiScript" << std::endl ;
exit(return_val); exit(return_val);
} }
int main(int argc, char *argv[]) { void interactive(chaiscript::ChaiScript& chai)
std::string input; {
using_history();
for (;;) {
std::string input = get_next_command();
try {
// evaluate input
chaiscript::Boxed_Value val = chai.eval(input);
//Then, we try to print the result of the evaluation to the user
if (!val.get_type_info().bare_equal(chaiscript::user_type<void>())) {
try {
chaiscript::dispatch(chai.get_eval_engine().get_function("print"), chaiscript::Param_List_Builder() << val);
}
catch (...) {} //If we can't, do nothing
}
}
catch (chaiscript::Eval_Error &ee) {
std::cout << ee.what();
if (ee.call_stack.size() > 0) {
std::cout << "during evaluation at (" << ee.call_stack[0]->start.line << ", " << ee.call_stack[0]->start.column << ")";
}
std::cout << std::endl;
}
catch (std::exception &e) {
std::cout << e.what();
std::cout << std::endl;
}
}
}
int main(int argc, char *argv[])
{
int error = EXIT_SUCCESS;
std::vector<std::string> usepaths; std::vector<std::string> usepaths;
std::vector<std::string> modulepaths; std::vector<std::string> modulepaths;
// Disable deprecation warning for getenv call. // Disable deprecation warning for getenv call.
#ifdef BOOST_MSVC #ifdef BOOST_MSVC
#pragma warning(push) #pragma warning(push)
#pragma warning(disable : 4996) #pragma warning(disable : 4996)
#endif #endif
const char *usepath = getenv("CHAI_USE_PATH"); const char *usepath = getenv("CHAI_USE_PATH");
const char *modulepath = getenv("CHAI_MODULE_PATH"); const char *modulepath = getenv("CHAI_MODULE_PATH");
#ifdef BOOST_MSVC #ifdef BOOST_MSVC
#pragma warning(pop) #pragma warning(pop)
#endif #endif
usepaths.push_back(""); usepaths.push_back("");
if (usepath) if (usepath)
{ {
usepaths.push_back(usepath); usepaths.push_back(usepath);
} }
modulepaths.push_back(""); modulepaths.push_back("");
if (modulepath) if (modulepath)
{ {
modulepaths.push_back(modulepath); modulepaths.push_back(modulepath);
} }
chaiscript::ChaiScript chai(modulepaths,usepaths); chaiscript::ChaiScript chai(modulepaths,usepaths);
chai.add(chaiscript::fun(&myexit), "exit"); chai.add(chaiscript::fun(&myexit), "exit");
chai.add(chaiscript::fun(&throws_exception), "throws_exception"); chai.add(chaiscript::fun(&throws_exception), "throws_exception");
if (argc < 2) { if (argc < 2) {
#ifdef READLINE_AVAILABLE interactive(chai);
using_history(); } else {
#endif for (int i = 1; !error && (i < argc); ++i) {
input = get_next_command();
while (input != "quit") {
chaiscript::Boxed_Value val;
if (input == "help") {
print_help();
}
else {
try {
//First, we evaluate it
val = chai.eval(input);
//Then, we try to print the result of the evaluation to the user
if (!val.get_type_info().bare_equal(chaiscript::user_type<void>())) {
try {
chaiscript::dispatch(chai.get_eval_engine().get_function("print"), chaiscript::Param_List_Builder() << val);
}
catch (...) {
//If we can't, do nothing
}
}
}
catch (chaiscript::Eval_Error &ee) {
std::cout << ee.what();
if (ee.call_stack.size() > 0) {
std::cout << "during evaluation at (" << ee.call_stack[0]->start.line << ", " << ee.call_stack[0]->start.column << ")";
}
std::cout << std::endl;
}
catch (std::exception &e) {
std::cout << e.what();
std::cout << std::endl;
}
}
input = get_next_command();
}
}
else {
for (int i = 1; i < argc; ++i) {
try { try {
chaiscript::Boxed_Value val = chai.eval_file(argv[i]); chaiscript::Boxed_Value val = chai.eval_file(argv[i]);
} }
@ -155,15 +155,15 @@ int main(int argc, char *argv[]) {
} }
} }
std::cout << std::endl; std::cout << std::endl;
return EXIT_FAILURE; error = EXIT_FAILURE;
} }
catch (std::exception &e) { catch (std::exception &e) {
std::cout << e.what() << std::endl; std::cout << e.what() << std::endl;
return EXIT_FAILURE; error = EXIT_FAILURE;
} }
} }
} }
return EXIT_SUCCESS; return error ;
} }