From 7379eaf180c0dc7ccaf0ff11de7874fd7719782a Mon Sep 17 00:00:00 2001 From: zawz Date: Wed, 19 May 2021 16:44:57 +0200 Subject: [PATCH] internal rework: implement parsing context --- Makefile | 1 - include/exec.hpp | 6 +- include/parse.hpp | 81 ++- include/resolve.hpp | 8 +- include/struc.hpp | 16 +- src/debashify.cpp | 4 +- src/exec.cpp | 121 ++-- src/generate.cpp | 2 +- src/main.cpp | 26 +- src/parse.cpp | 1576 +++++++++++++++++++++++++----------------- src/processing.cpp | 5 +- src/resolve.cpp | 90 ++- src/struc_helper.cpp | 14 +- 13 files changed, 1160 insertions(+), 790 deletions(-) diff --git a/Makefile b/Makefile index 992976b..2fc0e41 100644 --- a/Makefile +++ b/Makefile @@ -17,7 +17,6 @@ CC=g++ CXXFLAGS= -I$(IDIR) -Wall -pedantic -std=c++20 ifeq ($(DEBUG),true) # debugging flags - CC=clang++ CXXFLAGS += -g -D NO_PARSE_CATCH else # release flags diff --git a/include/exec.hpp b/include/exec.hpp index 109f2b3..b848675 100644 --- a/include/exec.hpp +++ b/include/exec.hpp @@ -2,11 +2,11 @@ #define EXEC_HPP #include "options.hpp" +#include "parse.hpp" -void parse_exec(FILE* fd, const char* in, uint32_t size, std::string const& filename=""); -inline void parse_exec(FILE* fd, std::string const& in, std::string const& filename="") { parse_exec(fd, in.c_str(), in.size(), filename); } +void parse_exec(FILE* fd, parse_context ct); -int exec_process(std::string const& runtime, std::vector const& args, std::string const& filecontents, std::string const& file); +int exec_process(std::string const& runtime, std::vector const& args, parse_context ct); #endif //EXEC_HPP diff --git a/include/parse.hpp b/include/parse.hpp index e9eda8c..6a46538 100644 --- a/include/parse.hpp +++ b/include/parse.hpp @@ -25,59 +25,84 @@ #define ARRAY_ARG_END " \t\n;#()&|<>]" // macros -#define PARSE_ERROR(str, i) format_error(str, "", in, i) +// #define PARSE_ERROR_I(str, ctx, i) format_error(str, ctx.filename, ctx.data, i) +// #define PARSE_ERROR(str, ctx) format_error(str, ctx.filename, ctx.data, ctx.i) +// #define PARSE_ERROR_I(str, ctx, i) { printFormatError(format_error(str, ctx.filename, ctx.data, i)); ctx.has_errored=true; } +// #define PARSE_ERROR(str, ctx) { printFormatError(format_error(str, ctx.filename, ctx.data, ctx.i)); ctx.has_errored=true; } +// structs + +void parse_error(std::string const& message, parse_context& ctx); + +void parse_error(std::string const& message, parse_context& ctx, uint64_t i); // globals -extern bool g_bash; - extern const std::vector posix_cmdvar; extern const std::vector bash_cmdvar; std::string import_file(std::string const& path); -shmain* parse_text(const char* in, uint32_t size, std::string const& filename=""); -inline shmain* parse_text(std::string const& in, std::string const& filename="") { return parse_text(in.c_str(), in.size(), filename); } -inline shmain* parse(std::string const& file) { return parse_text(import_file(file), file); } +std::pair parse_text(parse_context context); +std::pair parse_text(std::string const& in, std::string const& filename=""); +inline std::pair parse(std::string const& file) { return parse_text(import_file(file), file); } + +// tools +parse_context make_context(std::string const& in, std::string const& filename="", bool bash=false); +parse_context make_context(parse_context ctx, std::string const& in="", std::string const& filename="", bool bash=false); +parse_context make_context(parse_context ctx, uint64_t i); +parse_context operator+(parse_context ctx, int64_t a); +parse_context operator-(parse_context ctx, int64_t a); // ** unit parsers ** // /* util parsers */ -bool word_eq(const char* word, const char* in, uint32_t size, uint32_t start, const char* end_set=NULL); -std::pair get_word(const char* in, uint32_t size, uint32_t start, const char* end_set); +uint32_t word_eq(const char* word, const char* in, uint32_t size, uint32_t start, const char* end_set=NULL); +inline bool word_eq(const char* word, parse_context const& ct, const char* end_set=NULL) { + return word_eq(word, ct.data, ct.size, ct.i, end_set); +} +std::pair get_word(parse_context ct, const char* end_set); uint32_t skip_chars(const char* in, uint32_t size, uint32_t start, const char* set); +inline uint32_t skip_chars(parse_context const& ct, const char* set) { + return skip_chars(ct.data, ct.size, ct.i, set); +} uint32_t skip_until(const char* in, uint32_t size, uint32_t start, const char* set); +inline uint32_t skip_until(parse_context const& ct, const char* set) { + return skip_until(ct.data, ct.size, ct.i, set); +} uint32_t skip_unread(const char* in, uint32_t size, uint32_t start); +inline uint32_t skip_unread(parse_context const& ct) { + return skip_unread(ct.data, ct.size, ct.i); +} // list -std::pair parse_list_until(const char* in, uint32_t size, uint32_t start, char end_c, const char* expecting=NULL); -std::pair parse_list_until(const char* in, uint32_t size, uint32_t start, std::string const& end_word); -std::tuple parse_list_until(const char* in, uint32_t size, uint32_t start, std::vector const& end_words, const char* expecting=NULL); +std::pair parse_list_until(parse_context ct, char end_c, const char* expecting=NULL); +std::pair parse_list_until(parse_context ct, std::string const& end_word); +std::tuple parse_list_until(parse_context ct, std::vector const& end_words, const char* expecting=NULL); // name -std::pair parse_var(const char* in, uint32_t size, uint32_t start, bool specialvars=true, bool array=false); +std::pair parse_var(parse_context ct, bool specialvars=true, bool array=false); // subarg parsers -std::pair parse_arithmetic(const char* in, uint32_t size, uint32_t start); -std::pair parse_manipulation(const char* in, uint32_t size, uint32_t start); +std::pair parse_arithmetic(parse_context ct); +std::pair parse_manipulation(parse_context ct); // arg parser -std::pair parse_arg(const char* in, uint32_t size, uint32_t start, const char* end=ARG_END, const char* unexpected=SPECIAL_TOKENS, bool doquote=true); +std::pair parse_arg(parse_context ct, const char* end=ARG_END, const char* unexpected=SPECIAL_TOKENS, bool doquote=true); // redirect parser -std::pair parse_redirect(const char* in, uint32_t size, uint32_t start); +std::pair parse_redirect(parse_context ct); // arglist parser -std::pair parse_arglist(const char* in, uint32_t size, uint32_t start, bool hard_error=false, std::vector* redirs=nullptr); +std::pair parse_arglist(parse_context ct, bool hard_error=false, std::vector* redirs=nullptr); // block parsers -std::pair parse_block(const char* in, uint32_t size, uint32_t start); -std::pair parse_cmd(const char* in, uint32_t size, uint32_t start); -std::pair parse_function(const char* in, uint32_t size, uint32_t start, const char* after="()"); -std::pair parse_subshell(const char* in, uint32_t size, uint32_t start); -std::pair parse_brace(const char* in, uint32_t size, uint32_t start); -std::pair parse_case(const char* in, uint32_t size, uint32_t start); -std::pair parse_if(const char* in, uint32_t size, uint32_t start); -std::pair parse_for(const char* in, uint32_t size, uint32_t start); -std::pair parse_while(const char* in, uint32_t size, uint32_t start); +std::pair parse_block(parse_context ct); +std::pair parse_cmd(parse_context ct); +std::pair parse_function(parse_context ct, const char* after="()"); +std::pair parse_subshell(parse_context ct); +std::pair parse_brace(parse_context ct); +std::pair parse_case(parse_context ct); +std::pair parse_if(parse_context ct); +std::pair parse_for(parse_context ct); +std::pair parse_while(parse_context ct); // pipeline parser -std::pair parse_pipeline(const char* in, uint32_t size, uint32_t start); +std::pair parse_pipeline(parse_context ct); // condlist parser -std::pair parse_condlist(const char* in, uint32_t size, uint32_t start); +std::pair parse_condlist(parse_context ct); #endif //PARSE_HPP diff --git a/include/resolve.hpp b/include/resolve.hpp index d93a3e6..360506a 100644 --- a/include/resolve.hpp +++ b/include/resolve.hpp @@ -2,16 +2,16 @@ #define RESOLVE_HPP #include "struc.hpp" +#include "parse.hpp" extern std::vector included; -std::vector> do_include_raw(condlist* cmd, std::string const& filename, std::string* ex_dir=nullptr); -std::pair do_resolve_raw(condlist* cmd, std::string const& filename, std::string* ex_dir=nullptr); +std::vector> do_include_raw(condlist* cmd, parse_context ctx, std::string* ex_dir=nullptr); +std::pair do_resolve_raw(condlist* cmd, parse_context ctx, std::string* ex_dir=nullptr); bool add_include(std::string const& file); -void resolve(_obj* sh, std::string* filename); -void resolve(shmain* sh); +void resolve(_obj* sh, parse_context ctx); std::string _pre_cd(std::string const& filename); void _cd(std::string const& dir); diff --git a/include/struc.hpp b/include/struc.hpp index a7b56f5..8669fa1 100644 --- a/include/struc.hpp +++ b/include/struc.hpp @@ -62,13 +62,26 @@ subarg: can be one of */ +struct parse_context { + const char* data=NULL; + uint64_t size=0; + uint64_t i=0; + const char* filename=""; + bool bash=false; + const char* expecting=""; + const char* here_delimiter=""; + const char* here_doc=""; + const char operator[](uint64_t a) { return data[a]; } + bool has_errored=false; +}; + // exceptions class format_error : public std::exception { public: //! @brief Conctructor inline format_error(const std::string& what, const std::string& origin, const std::string& data, int where) { desc=what; index=where; filename=origin; sdat=data; } - + inline format_error(const std::string& what, parse_context const& ctx) { desc=what; index=ctx.i; filename=ctx.filename; sdat=ctx.data; } //! @brief Error message inline const char * what () const throw () {return desc.c_str();} //! @brief Origin of the data, name of imported file, otherwise empty if generated @@ -84,6 +97,7 @@ private: std::string sdat; }; + // objects diff --git a/src/debashify.cpp b/src/debashify.cpp index 63c1961..fed43cc 100644 --- a/src/debashify.cpp +++ b/src/debashify.cpp @@ -473,7 +473,7 @@ bool debashify_array_set(cmd* in, debashify_params* params) gen=gen.substr(2); gen.pop_back(); // create cmd out of arguments - arglist* args = parse_arglist( gen.c_str(), gen.size(), 0 ).first; + arglist* args = parse_arglist( make_context(gen) ).first; cmd* c = new cmd(args); // cmd first argument is _lxsh_X_create if(params->arrays[varname]) @@ -569,7 +569,7 @@ bool debashify_array_set(cmd* in, debashify_params* params) gen=gen.substr(3); gen.pop_back(); // create cmd out of arguments - arglist* args = parse_arglist( gen.c_str(), gen.size(), 0 ).first; + arglist* args = parse_arglist( make_context(gen) ).first; cmd* c = new cmd(args); // cmd first argument is _lxsh_array_create if(params->arrays[varname]) diff --git a/src/exec.cpp b/src/exec.cpp index 8ed7f20..53eb6ad 100644 --- a/src/exec.cpp +++ b/src/exec.cpp @@ -18,16 +18,16 @@ #define PIPE_READ 0 #define PIPE_WRITE 1 -std::vector do_include_exec(condlist* cmd, std::string const& filename, FILE* fd) +std::vector do_include_exec(condlist* cmd, parse_context ctx, FILE* fd) { std::vector ret; std::string dir; - auto incs=do_include_raw(cmd, filename, &dir); + auto incs=do_include_raw(cmd, ctx, &dir); for(auto it: incs) { - parse_exec(fd, it.second, it.first); + parse_exec(fd, make_context(ctx, it.second, it.first)); } // cd back _cd(dir); @@ -36,7 +36,7 @@ std::vector do_include_exec(condlist* cmd, std::string const& filenam } // if first is nullptr: is a string -std::vector do_resolve_exec(condlist* cmd, std::string const& filename, FILE* fd) +std::vector do_resolve_exec(condlist* cmd, parse_context ctx, FILE* fd) { std::vector ret; @@ -45,9 +45,9 @@ std::vector do_resolve_exec(condlist* cmd, std::string const& filenam { // get std::string dir; - p=do_resolve_raw(cmd, filename, &dir); + p=do_resolve_raw(cmd, ctx, &dir); // do parse - parse_exec(fd, p.second, filename); + parse_exec(fd, make_context(ctx, p.second, p.first)); // cd back _cd(dir); } @@ -61,7 +61,7 @@ std::vector do_resolve_exec(condlist* cmd, std::string const& filenam // -- OBJECT CALLS -- -bool resolve_condlist_exec(condlist* in, std::string const& filename, FILE* fd) +bool resolve_condlist_exec(condlist* in, parse_context ctx, FILE* fd) { cmd* tc = in->first_cmd(); if(tc == nullptr) @@ -71,23 +71,23 @@ bool resolve_condlist_exec(condlist* in, std::string const& filename, FILE* fd) if(g_include && strcmd == "%include") { - do_include_exec(in, filename, fd); + do_include_exec(in, ctx, fd); return true; } else if(g_resolve && strcmd == "%resolve") { - do_resolve_exec(in, filename, fd); + do_resolve_exec(in, ctx, fd); return true; } return false; } -bool resolve_exec(condlist* in, std::string const& filename, FILE* fd) +bool resolve_exec(condlist* in, parse_context ctx, FILE* fd) { - if(!resolve_condlist_exec(in, filename, fd)) + if(!resolve_condlist_exec(in, ctx, fd)) { - resolve(in, (std::string*) &filename); + resolve(in, ctx); return false; } return true; @@ -138,60 +138,57 @@ std::string random_string() return ret; } -void parse_exec(FILE* fd, const char* in, uint32_t size, std::string const& filename) +void parse_exec(FILE* fd, parse_context ctx) { - uint32_t i=skip_unread(in, size, 0); -#ifndef NO_PARSE_CATCH - try + ctx.i=skip_unread(ctx); + + debashify_params debash_params; + list* t_lst=new list; + if(t_lst == nullptr) + throw std::runtime_error("Alloc error"); + while(ctx.iadd(pp.first); - if(g_resolve || g_include) + parse_list_until(ctx, 0); + throw std::runtime_error("Aborting due to previous errors"); + } + t_lst->add(pp.first); + if(g_resolve || g_include) + { + if(resolve_exec(t_lst->cls[0], ctx, fd)) { - if(resolve_exec(t_lst->cls[0], filename, fd)) - { - t_lst->clear(); - continue; - } - } - if(options["debashify"]) - debashify(t_lst, &debash_params); - - - std::string gen=t_lst->generate(0); - t_lst->clear(); - - fprintf(fd, "%s", gen.c_str()); - - if(i < size) - { - if(in[i] == '#') - ; // skip here - else if(is_in(in[i], COMMAND_SEPARATOR)) - i++; // skip on next char - else if(is_in(in[i], CONTROL_END)) - throw PARSE_ERROR(strf("Unexpected token: '%c'", in[i]), i); - - i = skip_unread(in, size, i); + t_lst->clear(); + continue; } } - delete t_lst; -#ifndef NO_PARSE_CATCH -} - catch(format_error& e) - { - throw format_error(e.what(), filename, in, e.where()); + if(options["debashify"]) + debashify(t_lst, &debash_params); + + + std::string gen=t_lst->generate(0); + t_lst->clear(); + + fprintf(fd, "%s", gen.c_str()); + + if(ctx.i < ctx.size) + { + if(ctx[ctx.i] == '#') + ; // skip here + else if(is_in(ctx[ctx.i], COMMAND_SEPARATOR)) + ctx.i++; // skip on next char + else if(is_in(ctx[ctx.i], CONTROL_END)) + { + format_error(strf("Unexpected token: '%c'", ctx[ctx.i]), ctx); + return; + } + + ctx.i = skip_unread(ctx); + } } -#endif + delete t_lst; } pid_t forkexec(const char* bin, char *const args[]) @@ -230,7 +227,7 @@ int wait_pid(pid_t pid) return WEXITSTATUS(stat); } -int exec_process(std::string const& runtime, std::vector const& args, std::string const& filecontents, std::string const& file) +int exec_process(std::string const& runtime, std::vector const& args, parse_context ctx) { std::vector strargs = split(runtime, " \t"); std::vector runargs; @@ -250,8 +247,6 @@ int exec_process(std::string const& runtime, std::vector const& arg runargs.push_back(NULL); pid_t pid=0; - // std::string test="echo Hello world\nexit 10\n"; - // fprintf(ffd, "%s\n",, test.c_str(), test.size()); FILE* ffd=0; try { @@ -264,7 +259,7 @@ int exec_process(std::string const& runtime, std::vector const& arg } for(auto it: lxsh_extend_fcts) fprintf(ffd, "%s\n", it.second.code); - parse_exec(ffd, filecontents, file); + parse_exec(ffd, ctx); } catch(std::runtime_error& e) { diff --git a/src/generate.cpp b/src/generate.cpp index f962bc7..6139c15 100644 --- a/src/generate.cpp +++ b/src/generate.cpp @@ -300,7 +300,7 @@ std::string case_block::generate(int ind) // replace ;; from last case with ; if(this->cases.size()>0 && opt_minify) { - ret.erase(ret.size()-1, 2); + ret.pop_back(); } // close case diff --git a/src/main.cpp b/src/main.cpp index a8f9385..706374c 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -79,6 +79,9 @@ int main(int argc, char* argv[]) // do parsing bool shebang_is_bin=false; + bool parse_bash=false; + parse_context ctx; + std::string binshebang; for(uint32_t i=0 ; ishebang="#!/bin/sh"; /* mid processing */ // resolve/include if(g_include || g_resolve) - resolve(tsh); + resolve(tsh, ctx); // concatenate to main sh->concat(tsh); diff --git a/src/parse.cpp b/src/parse.cpp index bb8e7ef..16948a2 100644 --- a/src/parse.cpp +++ b/src/parse.cpp @@ -11,8 +11,6 @@ #define ORIGIN_NONE "" -bool g_bash=false; - // macro // constants @@ -27,7 +25,32 @@ const std::vector out_reserved_words = { "then", "else", "fi", "esa // stuff -std::string g_expecting; + +std::string unexpected_token(char c) +{ + std::string print; + print += c; + if(c == '\n') + print="\\n"; + return "Unexpected token '"+print+"'"; +} + +std::string unexpected_token(std::string const& s) +{ + return "Unexpected token '"+s+"'"; +} + +void parse_error(std::string const& message, parse_context& ctx) +{ + printFormatError(format_error(message, ctx.filename, ctx.data, ctx.i)); + ctx.has_errored=true; +} + +void parse_error(std::string const& message, parse_context& ctx, uint64_t i) +{ + printFormatError(format_error(message, ctx.filename, ctx.data, i)); + ctx.has_errored=true; +} std::string expecting(std::string const& word) { @@ -37,6 +60,14 @@ std::string expecting(std::string const& word) return ""; } +std::string expecting(const char* word) +{ + if(word == NULL) + return expecting(std::string()); + else + return expecting(std::string(word)); +} + // basic char utils bool has_common_char(const char* str1, const char* str2) @@ -63,29 +94,44 @@ bool valid_name(std::string const& str) // string utils -bool word_eq(const char* word, const char* in, uint32_t size, uint32_t start, const char* end_set) +parse_context make_context(std::string const& in, std::string const& filename, bool bash) { - uint32_t wordsize=strlen(word); - if(wordsize > size-start) - return false; - if(strncmp(word, in+start, wordsize) == 0) - { - if(end_set==NULL) - return true; - // end set - if(wordsize < size-start) - return is_in(in[start+wordsize], end_set); - } - return false; + parse_context ctx = { .data=in.c_str(), .size=in.size(), .filename=filename.c_str(), .bash=bash}; + return ctx; } -std::pair get_word(const char* in, uint32_t size, uint32_t start, const char* end_set) +parse_context make_context(parse_context ctx, std::string const& in, std::string const& filename, bool bash) { - uint32_t i=start; - while(i parse_var(const char* in, uint32_t size, uint32_t start, bool specialvars, bool array) +uint32_t word_eq(const char* word, const char* in, uint32_t size, uint32_t start, const char* end_set) { uint32_t i=start; + uint32_t wordsize=strlen(word); + if(wordsize > size-i) + return false; + if(strncmp(word, in+i, wordsize) == 0) + { + if(end_set==NULL) + return true; + // end set + if(wordsize < size-i) + return is_in(in[i+wordsize], end_set); + } + return false; +} + +std::pair get_word(parse_context ctx, const char* end_set) +{ + uint32_t start=ctx.i; + while(ctx.i parse_var(parse_context ctx, bool specialvars, bool array) +{ variable* ret=nullptr; std::string varname; + uint32_t start=ctx.i; // special vars - if(specialvars && (is_in(in[i], SPECIAL_VARS) || (in[i]>='0' && in[i]<='1')) ) + if(specialvars && (is_in(ctx[ctx.i], SPECIAL_VARS) || (ctx[ctx.i]>='0' && ctx[ctx.i]<='9')) ) { - varname=in[i]; - i++; + varname=ctx[ctx.i]; + ctx.i++; } else // varname { - while(iindex=pp.first; - i = pp.second; - if(in[i] != ']') - throw PARSE_ERROR( "Expecting ']'", i ); - i++; + ctx = pp.second; + if(ctx[ctx.i] != ']') + { + parse_error( "Expecting ']'", ctx ); + return std::make_pair(ret, ctx); + } + ctx.i++; } } - return std::make_pair(ret, i); + return std::make_pair(ret, ctx); } -std::pair get_operator(const char* in, uint32_t size, uint32_t start) +std::pair get_operator(parse_context ctx) { - uint32_t i=start; std::string ret; + uint32_t start=ctx.i; - while(!is_alphanum(in[i]) && !is_in(in[i], SEPARATORS) && in[i]!=')' ) - i++; + while(!is_alphanum(ctx[ctx.i]) && !is_in(ctx[ctx.i], SEPARATORS) && ctx[ctx.i]!=')' ) + ctx.i++; - ret = std::string(in+start, i-start); + ret = std::string(ctx.data+start, ctx.i-start); - return std::make_pair(ret, i); + return std::make_pair(ret, ctx.i); } +//** HERE **// + // parse an arithmetic // ends at )) // temporary, to improve -std::pair parse_arithmetic(const char* in, uint32_t size, uint32_t start) +std::pair parse_arithmetic(parse_context ctx) { arithmetic* ret = nullptr; - uint32_t i=start; #ifndef NO_PARSE_CATCH try { #endif - i = skip_chars(in, size, i, SEPARATORS); - if(i>size || in[i] == ')') - throw PARSE_ERROR( "Unexpected end of arithmetic", i ); +; + ctx.i = skip_chars(ctx, SEPARATORS); + if(ctx.i>ctx.size || ctx[ctx.i] == ')') + { + parse_error( "Unexpected end of arithmetic", ctx ); + return std::make_pair(ret, ctx); + } - auto po = get_operator(in, size, i); + auto po = get_operator(ctx); if(is_among(po.first, arithmetic_precedence_operators)) { - auto pa = parse_arithmetic(in, size, po.second); + ctx.i = po.second; + auto pa = parse_arithmetic(ctx); ret = new operation_arithmetic(po.first, pa.first, nullptr, true); - i=pa.second; + ctx=pa.second; } else { variable_arithmetic* ttvar=nullptr; // for categorizing definitions - if(in[i]=='-' || is_num(in[i])) + if(ctx[ctx.i]=='-' || is_num(ctx[ctx.i])) { - uint32_t j=i; - if(in[i]=='-') - i++; - while(is_num(in[i])) - i++; - ret = new number_arithmetic( std::string(in+j, i-j) ); + uint32_t j=ctx.i; + if(ctx[ctx.i]=='-') + ctx.i++; + while(is_num(ctx[ctx.i])) + ctx.i++; + ret = new number_arithmetic( std::string(ctx.data+j, ctx.i-j) ); } - else if(word_eq("$(", in, size, i)) + else if(word_eq("$(", ctx)) { - auto ps = parse_subshell(in, size, i+2); + ctx.i+=2; + auto ps = parse_subshell(ctx); ret = new subshell_arithmetic(ps.first); - i=ps.second; + ctx=ps.second; } - else if(word_eq("${", in, size, i)) + else if(word_eq("${", ctx)) { - auto pm = parse_manipulation(in, size, i+2); + ctx.i+=2; + auto pm = parse_manipulation(ctx); ret = new variable_arithmetic(pm.first); - i=pm.second; + ctx=pm.second; } - else if(in[i] == '(') + else if(ctx[ctx.i] == '(') { - auto pa = parse_arithmetic(in, size, i+1); + ctx.i++; + auto pa = parse_arithmetic(ctx); ret = pa.first; - i = pa.second+1; + ctx = pa.second; + ctx.i++; } else { bool specialvars=false; - if(in[i] == '$') + if(ctx[ctx.i] == '$') { specialvars=true; - i++; + ctx.i++; } - auto pp = parse_var(in, size, i, specialvars, true); + auto pp = parse_var(ctx, specialvars, true); ttvar = new variable_arithmetic(pp.first); ret = ttvar; - i=pp.second; + ctx=pp.second; } - i = skip_chars(in, size, i, SEPARATORS); - auto po = get_operator(in, size, i); + ctx.i = skip_chars(ctx, SEPARATORS); + auto po = get_operator(ctx); if(po.first != "") { if(!is_among(po.first, arithmetic_operators)) - throw PARSE_ERROR( "Unknown arithmetic operator: "+po.first, i); + { + parse_error( "Unknown arithmetic operator: "+po.first, ctx); + } arithmetic* val1 = ret; - auto pa = parse_arithmetic(in, size, po.second); + ctx.i=po.second; + auto pa = parse_arithmetic(ctx); arithmetic* val2 = pa.first; - i = pa.second; + ctx = pa.second; ret = new operation_arithmetic(po.first, val1, val2); - i = skip_chars(in, size, i, SEPARATORS); + ctx.i = skip_chars(ctx, SEPARATORS); } if(po.first == "=" && ttvar!=nullptr) // categorize as var definition ttvar->var->definition=true; - if(i >= size) - throw PARSE_ERROR( "Unexpected end of file, expecting '))'", i ); - if(in[i] != ')') - throw PARSE_ERROR( "Unexpected token, expecting ')'", i); + if(ctx.i >= ctx.size) + { + parse_error( "Unexpected end of file, expecting '))'", ctx ); + return std::make_pair(ret, ctx); + } + if(ctx[ctx.i] != ')') + { + parse_error( unexpected_token(ctx[ctx.i])+ ", expecting ')'", ctx); + return std::make_pair(ret, ctx); + } } #ifndef NO_PARSE_CATCH } @@ -266,217 +363,245 @@ std::pair parse_arithmetic(const char* in, uint32_t size, } #endif - return std::make_pair(ret, i); + return std::make_pair(ret, ctx); } -std::pair parse_manipulation(const char* in, uint32_t size, uint32_t start) +std::pair parse_manipulation(parse_context ctx) { variable* ret = nullptr; - uint32_t i=start; arg* precede = nullptr; + uint32_t start=ctx.i; -#ifndef NO_PARSE_CATCH - try + + if(ctx[ctx.i] == '#' || ctx[ctx.i] == '!') { -#endif - ; - if(in[i] == '#' || in[i] == '!') + if(!ctx.bash && ctx[ctx.i] == '!') { - if(!g_bash && in[i] == '!') - throw PARSE_ERROR("bash specific: '${!}'", i); - std::string t; - t+=in[i]; - precede = new arg( t ); - i++; + parse_error("bash specific: '${!}'", ctx); + return std::make_pair(ret, ctx); } - - auto p=parse_var(in, size, i, true, true); - if(p.first == nullptr) - throw PARSE_ERROR( "Bad variable name", i ); - ret = p.first; - i = p.second; - - ret->is_manip=true; - if(precede != nullptr) - { - if(in[i] != '}') - throw PARSE_ERROR( "Incompatible operations", start ); - ret->manip = precede; - ret->precedence=true; - precede=nullptr; - } - else if(in[i] != '}') - { - auto pa = parse_arg(in, size, i, "}", NULL, false); - ret->manip=pa.first; - i = pa.second; - } - i++; - -#ifndef NO_PARSE_CATCH + std::string t; + t+=ctx[ctx.i]; + precede = new arg( t ); + ctx.i++; } - catch(format_error& e) + + auto p=parse_var(ctx, true, true); + if(p.first == nullptr) { - if(ret != nullptr) delete ret; - throw e; + parse_error( "Bad variable name", ctx ); + return std::make_pair(ret, ctx); } -#endif + ret = p.first; + ctx = p.second; - return std::make_pair(ret, i); + ret->is_manip=true; + if(precede != nullptr) + { + if(ctx[ctx.i] != '}') + { + parse_error( "Incompatible operations", ctx, start ); + return std::make_pair(ret, ctx); + } + ret->manip = precede; + ret->precedence=true; + precede=nullptr; + } + else if(ctx[ctx.i] != '}') + { + auto pa = parse_arg(ctx, "}", NULL, false); + ret->manip=pa.first; + ctx = pa.second; + } + ctx.i++; + + return std::make_pair(ret, ctx); } -void do_one_subarg_step(arg* ret, const char* in, uint32_t size, uint32_t& i, uint32_t& j, bool is_quoted) +parse_context do_one_subarg_step(arg* ret, parse_context ctx, uint32_t& j, bool is_quoted) { - if( in[i] == '`' ) + if( ctx[ctx.i] == '`' ) { // add previous subarg - std::string tmpstr=std::string(in+j, i-j); + std::string tmpstr=std::string(ctx.data+j, ctx.i-j); if(tmpstr!="") ret->add(tmpstr); - i++; - uint32_t k=skip_until(in, size, i, "`"); - if(k>=size) - throw PARSE_ERROR("Expecting '`'", i-1); - if(in[k-1] == '\\' && in[k-2] != '\\') - throw PARSE_ERROR("Escaping backticks is not supported", k); + ctx.i++; + uint32_t k=skip_until(ctx, "`"); + if(k>=ctx.size) + { + parse_error("Expecting '`'", ctx, ctx.i-1); + return ctx; + } + if(ctx[k-1] == '\\' && ctx[k-2] != '\\') + { + parse_error("Escaping backticks is not supported", ctx, k); + return make_context(ctx, k); + } // get subshell - auto r=parse_list_until(in, k, i, 0); + parse_context newct = ctx; + ctx.size=k; + auto r=parse_list_until(newct, 0); ret->add(new subshell_subarg(new subshell(r.first), is_quoted)); - j = i = r.second+1; + ctx = r.second; + ctx.i++; + j = ctx.i; } - else if( word_eq("$((", in, size, i) ) // arithmetic operation + else if( word_eq("$((", ctx) ) // arithmetic operation { // add previous subarg - std::string tmpstr=std::string(in+j, i-j); + std::string tmpstr=std::string(ctx.data+j, ctx.i-j); if(tmpstr!="") ret->add(tmpstr); // get arithmetic - auto r=parse_arithmetic(in, size, i+3); + ctx.i+=3; + auto r=parse_arithmetic(ctx); arithmetic_subarg* tt = new arithmetic_subarg(r.first); tt->quoted=is_quoted; ret->add(tt); - i = r.second; - if(!word_eq("))", in, size, i)) - throw PARSE_ERROR( "Unexpected token ')', expecting '))'", i); - i+=2; - j=i; + ctx = r.second; + if(ctx.i >= ctx.size) + return ctx; + if(!word_eq("))", ctx)) + { + parse_error( "Unexpected token ')', expecting '))'", ctx); + return ctx+1; + } + ctx.i+=2; + j=ctx.i; } - else if( word_eq("$(", in, size, i) ) // substitution + else if( word_eq("$(", ctx) ) // substitution { // add previous subarg - std::string tmpstr=std::string(in+j, i-j); + std::string tmpstr=std::string(ctx.data+j, ctx.i-j); if(tmpstr!="") ret->add(tmpstr); // get subshell - auto r=parse_subshell(in, size, i+2); + ctx.i+=2; + auto r=parse_subshell(ctx); ret->add(new subshell_subarg(r.first, is_quoted)); - j = i = r.second; + ctx = r.second; + j = ctx.i; } - else if( word_eq("${", in, size, i) ) // variable manipulation + else if( word_eq("${", ctx) ) // variable manipulation { // add previous subarg - std::string tmpstr=std::string(in+j, i-j); + std::string tmpstr=std::string(ctx.data+j, ctx.i-j); if(tmpstr!="") ret->add(tmpstr); // get manipulation - auto r=parse_manipulation(in, size, i+2); + ctx.i+=2; + auto r=parse_manipulation(ctx); ret->add(new variable_subarg(r.first, is_quoted)); - j = i = r.second; + ctx = r.second; + j = ctx.i; } - else if( in[i] == '$' ) + else if( ctx[ctx.i] == '$' ) { - auto r=parse_var(in, size, i+1); + parse_context newct=ctx; + newct.i++; + auto r=parse_var(newct); if(r.first !=nullptr) { // add previous subarg - std::string tmpstr=std::string(in+j, i-j); + std::string tmpstr=std::string(ctx.data+j, ctx.i-j); if(tmpstr!="") ret->add(tmpstr); // add var ret->add(new variable_subarg(r.first, is_quoted)); - j = i = r.second; + ctx = r.second; + j = ctx.i; } else - i++; + ctx.i++; } else - i++; + ctx.i++; + return ctx; } // parse one argument // must start at a read char // ends at either " \t|&;\n()" -std::pair parse_arg(const char* in, uint32_t size, uint32_t start, const char* end, const char* unexpected, bool doquote) +std::pair parse_arg(parse_context ctx, const char* end, const char* unexpected, bool doquote) { arg* ret = new arg; // j : start of subarg , q = start of quote - uint32_t i=start,j=start,q=start; + uint32_t j=ctx.i,q=ctx.i; #ifndef NO_PARSE_CATCH try { #endif ; - if(unexpected != NULL && is_in(in[i], unexpected)) - throw PARSE_ERROR( strf("Unexpected token '%c'", in[i]) , i); - - while(i") && in[i+1]=='&') // special case for <& and >& + parse_error( unexpected_token(ctx[ctx.i]) , ctx); + } + + while(ctx.i") && ctx[ctx.i+1]=='&') // special case for <& and >& { - i+=2; + ctx.i += 2; } - else if(doquote && in[i]=='\\') // backslash: don't check next char + else if(doquote && ctx[ctx.i]=='\\') // backslash: don't check next char { - i++; - if(i>=size) + ctx.i++; + if(ctx.i>=ctx.size) break; - if(in[i] == '\n') // \ on \n : skip this char + if(ctx[ctx.i] == '\n') // \ on \n : skip this char { - std::string tmpstr=std::string(in+j, i-1-j); + std::string tmpstr=std::string(ctx.data+j, ctx.i-1-j); if(tmpstr!="") ret->add(tmpstr); - i++; - j=i; + ctx.i++; + j=ctx.i; } else - i++; + ctx.i++; } - else if(doquote && in[i] == '"') // start double quote + else if(doquote && ctx[ctx.i] == '"') // start double quote { - q=i; - i++; - while(in[i] != '"') // while inside quoted string + q=ctx.i; + ctx.i++; + while(ctx[ctx.i] != '"') // while inside quoted string { - if(in[i] == '\\') // backslash: don't check next char + if(ctx[ctx.i] == '\\') // backslash: don't check next char { - i+=2; + ctx.i+=2; } else - do_one_subarg_step(ret, in, size, i, j, true); + ctx = do_one_subarg_step(ret, ctx, j, true); - if(i>=size) - throw PARSE_ERROR("Unterminated double quote", q); + if(ctx.i>=ctx.size) + { + parse_error("Unterminated double quote", ctx, q); + return std::make_pair(ret, ctx); + } } - i++; + ctx.i++; } - else if(doquote && in[i] == '\'') // start single quote + else if(doquote && ctx[ctx.i] == '\'') // start single quote { - q=i; - i++; - while(i=size) - throw PARSE_ERROR("Unterminated single quote", q); - i++; + q=ctx.i; + ctx.i++; + while(ctx.i=ctx.size) + { + parse_error("Unterminated single quote", ctx, q); + return std::make_pair(ret, ctx); + } + ctx.i++; } else - do_one_subarg_step(ret, in, size, i, j, false); + ctx = do_one_subarg_step(ret, ctx, j, false); } // add string subarg - std::string val=std::string(in+j, i-j); + std::string val=std::string(ctx.data+j, ctx.i-j); if(val != "") ret->add(val); @@ -489,75 +614,89 @@ std::pair parse_arg(const char* in, uint32_t size, uint32_t star } #endif - return std::make_pair(ret, i); + return std::make_pair(ret, ctx); } -std::pair parse_redirect(const char* in, uint32_t size, uint32_t start) +std::pair parse_redirect(parse_context ctx) { - uint32_t i=start; - bool is_redirect=false; bool needs_arg=false; bool has_num_prefix=false; - if(is_num(in[i])) + uint32_t start=ctx.i; + + if(is_num(ctx[ctx.i])) { - i++; + ctx.i++; has_num_prefix=true; } - if( in[i] == '>' ) + if( ctx[ctx.i] == '>' ) { - i++; - if(i>size) - throw PARSE_ERROR("Unexpected end of file", i); - is_redirect = true; - if(i+1ctx.size) { - i+=2; + parse_error("Unexpected end of file", ctx); + return std::make_pair(nullptr, ctx); + } + is_redirect = true; + if(ctx.i+1& bash operator + else if(ctx[ctx.i] == '&') // >& bash operator { - if(!g_bash) - throw PARSE_ERROR("bash specific: '>&'", i); - i++; + if(!ctx.bash) + { + parse_error("bash specific: '>&'", ctx); + } + ctx.i++; needs_arg=true; } else { - if(in[i] == '>') - i++; + if(ctx[ctx.i] == '>') + ctx.i++; needs_arg=true; } } - else if( in[i] == '<' ) + else if( ctx[ctx.i] == '<' ) { if(has_num_prefix) - throw PARSE_ERROR("Invalid input redirection", i-1); - i++; - if(i>size) - throw PARSE_ERROR("Unexpected end of file", i); - if(in[i] == '<') { - i++; - if(ictx.size) + { + parse_error("Unexpected end of file", ctx); + return std::make_pair(nullptr, ctx); + } + if(ctx[ctx.i] == '<') + { + ctx.i++; + if(ctx.i", in, size, i) ) // &> bash operator + else if( word_eq("&>", ctx) ) // &> bash operator { - if(!g_bash) - throw PARSE_ERROR("bash specific: '&>'", i); - i+=2; - if(i') - i++; + if(!ctx.bash) + { + parse_error("bash specific: '&>'", ctx); + } + ctx.i+=2; + if(ctx.i') + ctx.i++; is_redirect=true; needs_arg=true; } @@ -570,20 +709,23 @@ std::pair parse_redirect(const char* in, uint32_t size, uin try { #endif +; ret = new redirect; - ret->op = std::string(in+start, i-start); + ret->op = std::string(ctx.data+start, ctx.i-start); if(needs_arg) { - i = skip_chars(in, size, i, SPACES); + ctx.i = skip_chars(ctx, SPACES); if(ret->op == "<<") { - auto pa = parse_arg(in, size, i); + auto pa = parse_arg(ctx); std::string delimitor = pa.first->string(); delete pa.first; pa.first = nullptr; if(delimitor == "") - throw PARSE_ERROR("Non-static or empty text input delimitor", i); + { + parse_error("Non-static or empty text input delimitor", ctx); + } if(delimitor.find('"') != std::string::npos || delimitor.find('\'') != std::string::npos || delimitor.find('\\') != std::string::npos) { @@ -591,35 +733,39 @@ std::pair parse_redirect(const char* in, uint32_t size, uin delimitor.pop_back(); // remove \n } - i = skip_chars(in, size, pa.second, SPACES); // skip spaces + ctx = pa.second; + ctx.i = skip_chars(ctx, SPACES); // skip spaces - if(in[i] == '#') // skip comment - i = skip_until(in, size, i, "\n"); //skip to endline - if(in[i] != '\n') // has another arg - throw PARSE_ERROR("Additionnal argument after text input delimitor", i); + if(ctx[ctx.i] == '#') // skip comment + ctx.i = skip_until(ctx, "\n"); //skip to endline + if(ctx[ctx.i] != '\n') // has another arg + { + parse_error("Additionnal argument after text input delimitor", ctx); + return std::make_pair(ret, ctx); + } - i++; - uint32_t j=i; + ctx.i++; + uint32_t j=ctx.i; char* tc=NULL; - tc = (char*) strstr(in+i, std::string("\n"+delimitor+"\n").c_str()); // find delimitor + tc = (char*) strstr(ctx.data+ctx.i, std::string("\n"+delimitor+"\n").c_str()); // find delimitor if(tc!=NULL) // delimitor was found { - i = (tc-in)+delimitor.size()+1; + ctx.i = (tc-ctx.data)+delimitor.size()+1; } else { - i = size; + ctx.i = ctx.size; } - std::string tmpparse=std::string(in+j, i-j); - auto pval = parse_arg(tmpparse.c_str(), tmpparse.size(), 0, NULL); + std::string tmpparse=std::string(ctx.data+j, ctx.i-j); + auto pval = parse_arg({ .data=tmpparse.c_str(), .size=tmpparse.size(), .i=0, .bash=ctx.bash}, NULL); ret->target = pval.first; ret->target->insert(0, delimitor+"\n"); } else { - auto pa = parse_arg(in, size, i); + auto pa = parse_arg(ctx); ret->target = pa.first; - i=pa.second; + ctx=pa.second; } } #ifndef NO_PARSE_CATCH @@ -631,19 +777,21 @@ std::pair parse_redirect(const char* in, uint32_t size, uin throw e; } #endif - return std::make_pair(ret, i); + return std::make_pair(ret, ctx); } else - return std::make_pair(nullptr, start); + { + ctx.i=start; + return std::make_pair(nullptr, ctx); + } } // parse one list of arguments (a command for instance) // must start at a read char // first char has to be read // ends at either &|;\n#() -std::pair parse_arglist(const char* in, uint32_t size, uint32_t start, bool hard_error, std::vector* redirs) +std::pair parse_arglist(parse_context ctx, bool hard_error, std::vector* redirs) { - uint32_t i=start; arglist* ret = nullptr; #ifndef NO_PARSE_CATCH @@ -651,58 +799,74 @@ std::pair parse_arglist(const char* in, uint32_t size, uint3 { #endif ; - if(word_eq("[[", in, size, i, ARG_END) ) // [[ bash specific parsing + if(word_eq("[[", ctx, ARG_END) ) // [[ bash specific parsing { - if(!g_bash) - throw PARSE_ERROR("bash specific: '[['", i); + if(!ctx.bash) + { + parse_error("bash specific: '[['", ctx); + } while(true) { if(ret == nullptr) ret = new arglist; - auto pp=parse_arg(in, size, i, SEPARATORS, NULL); + auto pp=parse_arg(ctx, SEPARATORS, NULL); ret->add(pp.first); - i = pp.second; - i = skip_chars(in, size, i, SEPARATORS); - if(word_eq("]]", in, size, i, ARG_END)) + ctx = pp.second; + ctx.i = skip_chars(ctx, SEPARATORS); + if(word_eq("]]", ctx, ARG_END)) { ret->add(new arg("]]")); - i = skip_chars(in, size, i+2, SPACES); - if( !is_in(in[i], ARGLIST_END) ) - throw PARSE_ERROR("Unexpected argument after ']]'", i); + ctx.i+=2; + ctx.i = skip_chars(ctx, SPACES); + if( !is_in(ctx[ctx.i], ARGLIST_END) ) + { + parse_error("Unexpected argument after ']]'", ctx); + ctx = parse_arglist(ctx).second; + } break; } - if(i>=size) - throw PARSE_ERROR( "Expecting ']]'", i); + if(ctx.i>=ctx.size) + { + parse_error( "Expecting ']]'", ctx); + return std::make_pair(ret, ctx); + } } } - else if(is_in(in[i], SPECIAL_TOKENS) && !word_eq("&>", in, size, i)) + else if(is_in(ctx[ctx.i], SPECIAL_TOKENS) && !word_eq("&>", ctx)) { if(hard_error) - throw PARSE_ERROR( strf("Unexpected token '%c'", in[i]) , i); + { + parse_error( unexpected_token(ctx[ctx.i]) , ctx); + } else - return std::make_pair(ret, i); + return std::make_pair(ret, ctx); } + // ** HERE ** else { - while(i') && in[i+1] == '(' ) // bash specific <() + if(ctx.i+1 < ctx.size && (ctx[ctx.i] == '<' || ctx[ctx.i] == '>') && ctx[ctx.i+1] == '(' ) // bash specific <() { - bool is_output = in[i] == '>'; - i+=2; + if(!ctx.bash) + { + parse_error(strf("bash specific: %c()", ctx[ctx.i]), ctx); + } + bool is_output = ctx[ctx.i] == '>'; + ctx.i+=2; if(ret == nullptr) ret = new arglist; - auto ps = parse_subshell(in, size, i); + auto ps = parse_subshell(ctx); ret->add(new arg(new procsub_subarg(is_output, ps.first))); - i=ps.second; + ctx=ps.second; } else if(redirs!=nullptr) { - auto pr = parse_redirect(in, size, i); + auto pr = parse_redirect(ctx); if(pr.first != nullptr) { redirs->push_back(pr.first); - i=pr.second; + ctx=pr.second; } else goto argparse; @@ -712,19 +876,22 @@ std::pair parse_arglist(const char* in, uint32_t size, uint3 argparse: if(ret == nullptr) ret = new arglist; - auto pp=parse_arg(in, size, i); + auto pp=parse_arg(ctx); ret->add(pp.first); - i = pp.second; + ctx = pp.second; } - i = skip_chars(in, size, i, SPACES); - if(word_eq("&>", in, size, i)) + ctx.i = skip_chars(ctx, SPACES); + if(word_eq("&>", ctx)) continue; // &> has to be managed in redirects - if(word_eq("|&", in, size, i)) - throw PARSE_ERROR("Unsupported '|&', use '2>&1 |' instead", i); - if(i>=size) - return std::make_pair(ret, i); - if( is_in(in[i], SPECIAL_TOKENS) ) - return std::make_pair(ret, i); + if(word_eq("|&", ctx)) + { + parse_error("Unsupported '|&', use '2>&1 |' instead", ctx); + return std::make_pair(ret, ctx+1); + } + if(ctx.i>=ctx.size) + return std::make_pair(ret, ctx); + if( is_in(ctx[ctx.i], SPECIAL_TOKENS) ) + return std::make_pair(ret, ctx); } } @@ -739,37 +906,42 @@ std::pair parse_arglist(const char* in, uint32_t size, uint3 throw e; } #endif - return std::make_pair(ret, i); + return std::make_pair(ret, ctx); } // parse a pipeline // must start at a read char // separated by | // ends at either &;\n#) -std::pair parse_pipeline(const char* in, uint32_t size, uint32_t start) +std::pair parse_pipeline(parse_context ctx) { - uint32_t i=start; pipeline* ret = new pipeline; #ifndef NO_PARSE_CATCH try { #endif - if(in[i] == '!' && i+1negated = true; - i=skip_chars(in, size, i+1, SPACES); + ctx.i++; + ctx.i=skip_chars(ctx, SPACES); } - while(iadd(pp.first); - i = skip_chars(in, size, pp.second, SPACES); - if( i>=size || is_in(in[i], PIPELINE_END) || word_eq("||", in, size, i) ) - return std::make_pair(ret, i); - else if( in[i] != '|' ) - throw PARSE_ERROR( strf("Unexpected token: '%c'", in[i] ), i); - i++; + ctx = pp.second; + ctx.i = skip_chars(ctx, SPACES); + if( ctx.i>=ctx.size || is_in(ctx[ctx.i], PIPELINE_END) || word_eq("||", ctx) ) + return std::make_pair(ret, ctx); + else if( ctx[ctx.i] != '|' ) + { + parse_error( unexpected_token(ctx[ctx.i] ), ctx); + return std::make_pair(ret, ctx); + } + ctx.i++; } #ifndef NO_PARSE_CATCH } @@ -779,53 +951,60 @@ std::pair parse_pipeline(const char* in, uint32_t size, uin throw e; } #endif - return std::make_pair(ret, i); + return std::make_pair(ret, ctx); } // parse condition lists // must start at a read char // separated by && or || // ends at either ;\n)# -std::pair parse_condlist(const char* in, uint32_t size, uint32_t start) +std::pair parse_condlist(parse_context ctx) { - uint32_t i = skip_unread(in, size, start); condlist* ret = new condlist; + ctx.i = skip_unread(ctx); #ifndef NO_PARSE_CATCH try { #endif +; bool optype=AND_OP; - while(iadd(pp.first, optype); - i = pp.second; - if(i>=size || is_in(in[i], CONTROL_END) || is_in(in[i], COMMAND_SEPARATOR)) // end here exactly: used for control later + ctx = pp.second; + if(ctx.i>=ctx.size || is_in(ctx[ctx.i], CONTROL_END) || is_in(ctx[ctx.i], COMMAND_SEPARATOR)) // end here exactly: used for control later { - return std::make_pair(ret, i); + return std::make_pair(ret, ctx); } - else if( word_eq("&", in, size, i) && !word_eq("&&", in, size, i) ) // parallel: end one char after + else if( word_eq("&", ctx) && !word_eq("&&", ctx) ) // parallel: end one char after { ret->parallel=true; - i++; - return std::make_pair(ret, i); + ctx.i++; + return std::make_pair(ret, ctx); } - else if( word_eq("&&", in, size, i) ) // and op + else if( word_eq("&&", ctx) ) // and op { - i += 2; + ctx.i += 2; optype=AND_OP; } - else if( word_eq("||", in, size, i) ) // or op + else if( word_eq("||", ctx) ) // or op { - i += 2; + ctx.i += 2; optype=OR_OP; } else - throw PARSE_ERROR( strf("Unexpected token: '%c'", in[i]), i); - i = skip_unread(in, size, i); - if(i>=size) - throw PARSE_ERROR( "Unexpected end of file", i ); + { + parse_error( unexpected_token(ctx[ctx.i]), ctx); + return std::make_pair(ret, ctx); + } + ctx.i = skip_unread(ctx); + if(ctx.i>=ctx.size) + { + parse_error( "Unexpected end of file", ctx ); + return std::make_pair(ret, ctx); + } } #ifndef NO_PARSE_CATCH } @@ -835,51 +1014,58 @@ std::pair parse_condlist(const char* in, uint32_t size, uin throw e; } #endif - return std::make_pair(ret, i); + return std::make_pair(ret, ctx); } -std::pair parse_list_until(const char* in, uint32_t size, uint32_t start, char end_c, const char* expecting) +std::pair parse_list_until(parse_context ctx, char end_c, const char* expecting) { list* ret = new list; - uint32_t i=skip_unread(in, size, start); + ctx.i=skip_unread(ctx); #ifndef NO_PARSE_CATCH try { #endif - while(in[i] != end_c) +; + const char* old_expect=ctx.expecting; + if(ctx.expecting!=NULL) + ctx.expecting = expecting; + + while(ctx[ctx.i] != end_c) { - auto pp=parse_condlist(in, size, i); + auto pp=parse_condlist(ctx); ret->add(pp.first); - i=pp.second; + ctx=pp.second; - if(i < size) + if(ctx.i < ctx.size) { - if(in[i] == end_c) // end char, stop here + if(ctx[ctx.i] == end_c) // end char, stop here break; - else if(in[i] == '#') + else if(ctx[ctx.i] == '#') ; // skip here - else if(is_in(in[i], COMMAND_SEPARATOR)) - i++; // skip on next char - else if(is_in(in[i], CONTROL_END)) - throw PARSE_ERROR(strf("Unexpected token: '%c'", in[i]), i); + else if(is_in(ctx[ctx.i], COMMAND_SEPARATOR)) + ctx.i++; // skip on next char + else if(is_in(ctx[ctx.i], CONTROL_END)) + { + parse_error( unexpected_token(ctx[ctx.i]), ctx); + return std::make_pair(ret, ctx); + } - i = skip_unread(in, size, i); + ctx.i = skip_unread(ctx); } - if(i>=size) + if(ctx.i>=ctx.size) { if(end_c != 0) { - if(expecting!=NULL) - throw PARSE_ERROR(strf("Expecting '%s'", expecting), start-1); - else - throw PARSE_ERROR(strf("Expecting '%c'", end_c), start-1); + parse_error(strf("Expecting '%s'", expecting), ctx); + return std::make_pair(ret, ctx); } else break; } } + ctx.expecting = old_expect; #ifndef NO_PARSE_CATCH } catch(format_error& e) @@ -888,94 +1074,107 @@ std::pair parse_list_until(const char* in, uint32_t size, uint3 throw e; } #endif - return std::make_pair(ret, i); + return std::make_pair(ret, ctx); } -std::pair parse_list_until(const char* in, uint32_t size, uint32_t start, std::string const& end_word) +std::pair parse_list_until(parse_context ctx, std::string const& end_word) { list* ret = new list; - uint32_t i=skip_unread(in, size, start); - -#ifndef NO_PARSE_CATCH - try - { -#endif - std::string old_expect=g_expecting; - g_expecting=end_word; - while(true) - { - // check word - auto wp=get_word(in, size, i, ARG_END); - if(wp.first == end_word) - { - i=wp.second; - break; - } - // do a parse - auto pp=parse_condlist(in, size, i); - ret->add(pp.first); - i=pp.second; - if(i=size) - { - throw PARSE_ERROR(strf("Expecting '%s'", end_word.c_str()), start-1); - } - } - g_expecting=old_expect; -#ifndef NO_PARSE_CATCH - } - catch(format_error& e) - { - delete ret; - throw e; - } -#endif - return std::make_pair(ret, i); -} - - -std::tuple parse_list_until(const char* in, uint32_t size, uint32_t start, std::vector const& end_words, const char* expecting) -{ - list* ret = new list; - uint32_t i=skip_unread(in, size, start);; + ctx.i=skip_unread(ctx); std::string found_end_word; #ifndef NO_PARSE_CATCH try { #endif - std::string old_expect=g_expecting; - g_expecting=end_words[0]; +; + const char* old_expect=ctx.expecting; + + ctx.expecting=end_word.c_str(); + + while(true) + { + // check word + auto wp=get_word(ctx, ARG_END); + if(wp.first == end_word) + { + ctx.i=wp.second; + break; + } + // do a parse + auto pp=parse_condlist(ctx); + ret->add(pp.first); + ctx=pp.second; + if(ctx.i=ctx.size) + { + parse_error(strf("Expecting '%s'", ctx.expecting), ctx); + return std::make_pair(ret, ctx); + } + } + ctx.expecting=old_expect; +#ifndef NO_PARSE_CATCH + } + catch(format_error& e) + { + delete ret; + throw e; + } +#endif + return std::make_pair(ret, ctx); +} + + +std::tuple parse_list_until(parse_context ctx, std::vector const& end_words, const char* expecting) +{ + list* ret = new list; + ctx.i=skip_unread(ctx); + std::string found_end_word; + +#ifndef NO_PARSE_CATCH + try + { +#endif +; + const char* old_expect=ctx.expecting; + + if(expecting!=NULL) + ctx.expecting=expecting; + else + ctx.expecting=end_words[0].c_str(); + bool stop=false; while(true) { // check words - auto wp=get_word(in, size, i, ARG_END); + auto wp=get_word(ctx, ARG_END); for(auto it: end_words) { - if(it == ";" && in[i] == ';') + if(it == ";" && ctx[ctx.i] == ';') { found_end_word=";"; - i++; + ctx.i++; stop=true; break; } if(wp.first == it) { found_end_word=it; - i=wp.second; + ctx.i=wp.second; stop=true; break; } @@ -983,25 +1182,23 @@ std::tuple parse_list_until(const char* in, uint32 if(stop) break; // do a parse - auto pp=parse_condlist(in, size, i); + auto pp=parse_condlist(ctx); ret->add(pp.first); - i=pp.second; - if(in[i] == '#') + ctx=pp.second; + if(ctx[ctx.i] == '#') ; // skip here - else if(is_in(in[i], COMMAND_SEPARATOR)) - i++; // skip on next + else if(is_in(ctx[ctx.i], COMMAND_SEPARATOR)) + ctx.i++; // skip on next - i = skip_unread(in, size, i); + ctx.i = skip_unread(ctx); // word wasn't found - if(i>=size) + if(ctx.i>=ctx.size) { - if(expecting!=NULL) - throw PARSE_ERROR(strf("Expecting '%s'", expecting), start-1); - else - throw PARSE_ERROR(strf("Expecting '%s'", end_words[0].c_str()), start-1); + parse_error(strf("Expecting '%s'", ctx.expecting), ctx); + return std::make_tuple(ret, ctx, ""); } } - g_expecting=old_expect; + ctx.expecting=old_expect; #ifndef NO_PARSE_CATCH } catch(format_error& e) @@ -1010,27 +1207,31 @@ std::tuple parse_list_until(const char* in, uint32 throw e; } #endif - return std::make_tuple(ret, i, found_end_word); + return std::make_tuple(ret, ctx, found_end_word); } // parse a subshell // must start right after the opening ( // ends at ) and nothing else -std::pair parse_subshell(const char* in, uint32_t size, uint32_t start) +std::pair parse_subshell(parse_context ctx) { - uint32_t i = skip_unread(in, size, start); subshell* ret = new subshell; + uint32_t start=ctx.i; + ctx.i = skip_unread(ctx); #ifndef NO_PARSE_CATCH try { #endif - auto pp=parse_list_until(in, size, start, ')'); +; + auto pp=parse_list_until(ctx, ')', ")"); ret->lst=pp.first; - i=pp.second; + ctx=pp.second; if(ret->lst->size()<=0) - throw PARSE_ERROR("Subshell is empty", start-1); - i++; + { + parse_error("Subshell is empty", ctx, start-1); + } + ctx.i++; #ifndef NO_PARSE_CATCH } catch(format_error& e) @@ -1039,28 +1240,33 @@ std::pair parse_subshell(const char* in, uint32_t size, uin throw e; } #endif - return std::make_pair(ret,i); + return std::make_pair(ret,ctx); } // parse a brace block // must start right after the opening { // ends at } and nothing else -std::pair parse_brace(const char* in, uint32_t size, uint32_t start) +std::pair parse_brace(parse_context ctx) { - uint32_t i = skip_unread(in, size, start); brace* ret = new brace; + uint32_t start=ctx.i; + ctx.i = skip_unread(ctx); #ifndef NO_PARSE_CATCH try { #endif - auto pp=parse_list_until(in, size, start, '}'); +; + auto pp=parse_list_until(ctx, '}', "}"); ret->lst=pp.first; - i=pp.second; + ctx=pp.second; if(ret->lst->size()<=0) - throw PARSE_ERROR("Brace block is empty", start-1); - i++; + { + parse_error("Brace block is empty", ctx, start-1); + return std::make_pair(ret, ctx+1); + } + ctx.i++; #ifndef NO_PARSE_CATCH } catch(format_error& e) @@ -1070,33 +1276,40 @@ std::pair parse_brace(const char* in, uint32_t size, uint32_t } #endif - return std::make_pair(ret,i); + return std::make_pair(ret,ctx); } // parse a function // must start right after the () // then parses a brace block -std::pair parse_function(const char* in, uint32_t size, uint32_t start, const char* after) +std::pair parse_function(parse_context ctx, const char* after) { - uint32_t i=start; function* ret = new function; #ifndef NO_PARSE_CATCH try { #endif - i=skip_unread(in, size, i); - if(in[i] != '{') - throw PARSE_ERROR( strf("Expecting { after %s", after) , i); - i++; +; + ctx.i=skip_unread(ctx); + if(ctx[ctx.i] != '{') + { + parse_error( strf("Expecting { after %s", after) , ctx); + return std::make_pair(ret, ctx); + } + ctx.i++; - auto pp=parse_list_until(in, size, i, '}'); + auto pp=parse_list_until(ctx, '}', "}"); if(pp.first->size()<=0) - throw PARSE_ERROR("Function is empty", i); + { + parse_error("Function is empty", ctx); + ctx.i=pp.second.i+1; + return std::make_pair(ret, ctx); + } ret->lst=pp.first; - i=pp.second; - i++; + ctx=pp.second; + ctx.i++; #ifndef NO_PARSE_CATCH } catch(format_error& e) @@ -1106,13 +1319,12 @@ std::pair parse_function(const char* in, uint32_t size, uin } #endif - return std::make_pair(ret, i); + return std::make_pair(ret, ctx); } // parse only var assigns -uint32_t parse_cmd_varassigns(cmd* ret, const char* in, uint32_t size, uint32_t start, bool cmdassign=false, std::string const& cmd="") +parse_context parse_cmd_varassigns(cmd* ret, parse_context ctx, bool cmdassign=false, std::string const& cmd="") { - uint32_t i=start; bool forbid_assign=false; bool forbid_special=false; if(cmdassign && (cmd == "read" || cmd == "unset") ) @@ -1120,70 +1332,88 @@ uint32_t parse_cmd_varassigns(cmd* ret, const char* in, uint32_t size, uint32_t if(cmdassign && (forbid_special || cmd == "export") ) forbid_special=true; - while(idefinition=true; - if(vp.first != nullptr && vp.secondinsert(0,"("); ta->add(")"); - i=pp.second+1; + ctx = pp.second; + ctx.i++; } - else if( is_in(in[i], ARG_END) ) // no value : give empty value + else if( is_in(ctx[ctx.i], ARG_END) ) // no value : give empty value { ta = new arg; } else { - auto pp=parse_arg(in, size, i); + auto pp=parse_arg(ctx); ta=pp.first; - i=pp.second; + ctx=pp.second; } ta->insert(0, strop); ret->var_assigns.push_back(std::make_pair(vp.first, ta)); - i=skip_chars(in, size, i, SPACES); + ctx.i=skip_chars(ctx, SPACES); } else { if(cmdassign) { - if(vp.first != nullptr && is_in(in[vp.second], ARG_END) ) + if(vp.first != nullptr && is_in(newct[newct.i], ARG_END) ) { ret->var_assigns.push_back(std::make_pair(vp.first, nullptr)); - i=vp.second; + ctx=newct; } else { delete vp.first; - auto pp=parse_arg(in, size, i); + auto pp=parse_arg(ctx); ret->var_assigns.push_back(std::make_pair(nullptr, pp.first)); - i=pp.second; + ctx=pp.second; } - i=skip_chars(in, size, i, SPACES); + ctx.i=skip_chars(ctx, SPACES); } else { @@ -1193,46 +1423,54 @@ uint32_t parse_cmd_varassigns(cmd* ret, const char* in, uint32_t size, uint32_t } } } - return i; + return ctx; } // must start at read char -std::pair parse_cmd(const char* in, uint32_t size, uint32_t start) +std::pair parse_cmd(parse_context ctx) { cmd* ret = new cmd; - uint32_t i=start; + uint32_t start=ctx.i; #ifndef NO_PARSE_CATCH try { #endif ; - i=parse_cmd_varassigns(ret, in, size, i); + ctx = parse_cmd_varassigns(ret, ctx); - auto wp=get_word(in, size, i, ARG_END); + auto wp=get_word(ctx, ARG_END); if(is_in_vector(wp.first, posix_cmdvar) || is_in_vector(wp.first, bash_cmdvar)) { - if(!g_bash && is_in_vector(wp.first, bash_cmdvar)) - throw PARSE_ERROR("bash specific: "+wp.first, i); + if(!ctx.bash && is_in_vector(wp.first, bash_cmdvar)) + { + parse_error("bash specific: "+wp.first, ctx); + } if(ret->var_assigns.size()>0) - throw PARSE_ERROR("Unallowed preceding variables on "+wp.first, start); + { + parse_error("Unallowed preceding variables on "+wp.first, ctx, start); + } ret->args = new arglist; ret->args->add(new arg(wp.first)); ret->is_cmdvar=true; - i=skip_chars(in, size, wp.second, SPACES); + ctx.i = wp.second; + ctx.i = skip_chars(ctx, SPACES); - i=parse_cmd_varassigns(ret, in, size, i, true, wp.first); + ctx = parse_cmd_varassigns(ret, ctx, true, wp.first); } - if(!is_in(in[i], SPECIAL_TOKENS)) + if(!is_in(ctx[ctx.i], SPECIAL_TOKENS)) { - auto pp=parse_arglist(in, size, i, true, &ret->redirs); + auto pp=parse_arglist(ctx, true, &ret->redirs); ret->args = pp.first; - i = pp.second; + ctx = pp.second; } else if(ret->var_assigns.size() <= 0) - throw PARSE_ERROR( strf("Unexpected token: '%c'", in[i]), i ); + { + parse_error( unexpected_token(ctx[ctx.i]), ctx ); + ctx.i++; + } #ifndef NO_PARSE_CATCH } @@ -1243,37 +1481,39 @@ std::pair parse_cmd(const char* in, uint32_t size, uint32_t star } #endif - return std::make_pair(ret, i); + return std::make_pair(ret, ctx); } // parse a case block // must start right after the case // ends at } and nothing else -std::pair parse_case(const char* in, uint32_t size, uint32_t start) +std::pair parse_case(parse_context ctx) { - uint32_t i=skip_chars(in, size, start, SPACES);; case_block* ret = new case_block; + ctx.i=skip_chars(ctx, SPACES); #ifndef NO_PARSE_CATCH try { #endif +; // get the treated argument - auto pa = parse_arg(in, size, i); + auto pa = parse_arg(ctx); ret->carg = pa.first; - i=skip_unread(in, size, pa.second); + ctx=pa.second; + ctx.i=skip_unread(ctx); // must be an 'in' - if(!word_eq("in", in, size, i, SEPARATORS)) + if(!word_eq("in", ctx, SEPARATORS)) { - std::string pp=get_word(in, size, i, SEPARATORS).first; - throw PARSE_ERROR( strf("Unexpected word: '%s', expecting 'in' after case", pp.c_str()), i); + std::string word=get_word(ctx, SEPARATORS).first; + parse_error( strf("Unexpected word: '%s', expecting 'in' after case", word.c_str()), ctx); } - - i=skip_unread(in, size, i+2); + ctx.i+=2; + ctx.i=skip_unread(ctx); // parse all cases - while(icases.push_back( std::make_pair(std::vector(), nullptr) ); @@ -1283,43 +1523,61 @@ std::pair parse_case(const char* in, uint32_t size, uint3 // toto) while(true) { - pa = parse_arg(in, size, i); + pa = parse_arg(ctx); cc->first.push_back(pa.first); + ctx = pa.second; if(pa.first->size() <= 0) - throw PARSE_ERROR("Empty case value", i); - i=skip_unread(in, size, pa.second); - if(i>=size) - throw PARSE_ERROR("Unexpected end of file. Expecting 'esac'", i); - if(in[i] == ')') + { + parse_error("Empty case value", ctx); + } + ctx.i = skip_unread(ctx); + if(ctx.i>=ctx.size) + { + parse_error("Unexpected end of file. Expecting 'esac'", ctx); + return std::make_pair(ret, ctx); + } + if(ctx[ctx.i] == ')') break; - if(in[i] != '|' && is_in(in[i], SPECIAL_TOKENS)) - throw PARSE_ERROR( strf("Unexpected token '%c', expecting ')'", in[i]), i ); - i=skip_unread(in, size, i+1); + if(ctx[ctx.i] != '|' && is_in(ctx[ctx.i], SPECIAL_TOKENS)) + { + parse_error( unexpected_token(ctx[ctx.i])+", expecting ')'", ctx ); + } + // | + ctx.i++; + ctx.i=skip_unread(ctx); } - i++; + ctx.i++; // until ;; - auto tp = parse_list_until(in, size, i, {";", "esac"}, ";;"); + auto tp = parse_list_until(ctx, {";", "esac"}, ";;"); cc->second = std::get<0>(tp); - i = std::get<1>(tp); + ctx = std::get<1>(tp); std::string word = std::get<2>(tp); if(word == "esac") { - i -= 4; + ctx.i -= 4; break; } - if(i >= size) - throw PARSE_ERROR("Expecting ';;'", i); - if(in[i-1] != ';') - throw PARSE_ERROR("Unexpected token: ';'", i); + if(ctx.i >= ctx.size) + { + parse_error("Expecting ';;'", ctx); + } + if(ctx[ctx.i-1] != ';') + { + parse_error("Unexpected token ';'", ctx); + } - i=skip_unread(in, size, i+1); + ctx.i++; + ctx.i=skip_unread(ctx); } // ended before finding esac - if(i>=size) - throw PARSE_ERROR("Expecting 'esac'", i); - i+=4; + if(ctx.i>=ctx.size) + { + parse_error("Expecting 'esac'", ctx); + return std::make_pair(ret, ctx); + } + ctx.i+=4; #ifndef NO_PARSE_CATCH } catch(format_error& e) @@ -1329,18 +1587,18 @@ std::pair parse_case(const char* in, uint32_t size, uint3 } #endif - return std::make_pair(ret, i); + return std::make_pair(ret, ctx); } -std::pair parse_if(const char* in, uint32_t size, uint32_t start) +std::pair parse_if(parse_context ctx) { if_block* ret = new if_block; - uint32_t i=start; #ifndef NO_PARSE_CATCH try { #endif +; while(true) { std::string word; @@ -1348,28 +1606,38 @@ std::pair parse_if(const char* in, uint32_t size, uint32_t ret->blocks.push_back(std::make_pair(nullptr, nullptr)); auto ll = ret->blocks.end()-1; - auto pp=parse_list_until(in, size, i, "then"); + auto pp=parse_list_until(ctx, "then"); ll->first = pp.first; - i = pp.second; if(ll->first->size()<=0) - throw PARSE_ERROR("Condition is empty", i); + { + parse_error("Condition is empty", ctx); + pp.second.has_errored=true; + } + ctx = pp.second; - auto tp=parse_list_until(in, size, i, {"fi", "elif", "else"}); + auto tp=parse_list_until(ctx, {"fi", "elif", "else"}); ll->second = std::get<0>(tp); - i = std::get<1>(tp); + parse_context newctx = std::get<1>(tp); word = std::get<2>(tp); - if(std::get<0>(tp)->size() <= 0) - throw PARSE_ERROR("if block is empty", i); + if(ll->second->size() <= 0) + { + parse_error("if block is empty", ctx); + newctx.has_errored=true; + } + ctx = newctx; if(word == "fi") break; if(word == "else") { - auto pp=parse_list_until(in, size, i, "fi"); + auto pp=parse_list_until(ctx, "fi"); if(pp.first->size()<=0) - throw PARSE_ERROR("else block is empty", i); + { + parse_error("else block is empty", ctx); + pp.second.has_errored=true; + } ret->else_lst=pp.first; - i=pp.second; + ctx=pp.second; break; } @@ -1384,54 +1652,73 @@ std::pair parse_if(const char* in, uint32_t size, uint32_t } #endif - return std::make_pair(ret, i); + return std::make_pair(ret, ctx); } -std::pair parse_for(const char* in, uint32_t size, uint32_t start) +std::pair parse_for(parse_context ctx) { for_block* ret = new for_block; - uint32_t i=skip_chars(in, size, start, SPACES); + ctx.i = skip_chars(ctx, SPACES); #ifndef NO_PARSE_CATCH try { #endif - auto wp = get_word(in, size, i, ARG_END); +; + auto wp = get_word(ctx, ARG_END); if(!valid_name(wp.first)) - throw PARSE_ERROR( strf("Bad identifier in for clause: '%s'", wp.first.c_str()), i ); + { + parse_error( strf("Bad variable name in for clause: '%s'", wp.first.c_str()), ctx ); + } ret->var = new variable(wp.first, nullptr, true); - i=skip_chars(in, size, wp.second, SPACES); + ctx.i = wp.second; + ctx.i=skip_chars(ctx, SPACES); // in - wp = get_word(in, size, i, ARG_END); + wp = get_word(ctx, ARG_END); if(wp.first == "in") { - i=skip_chars(in, size, wp.second, SPACES); - auto pp = parse_arglist(in, size, i, false); + ctx.i=wp.second; + ctx.i=skip_chars(ctx, SPACES); + auto pp = parse_arglist(ctx, false); ret->iter = pp.first; - i = pp.second; + ctx = pp.second; } else if(wp.first != "") - throw PARSE_ERROR( "Expecting 'in' after for", i ); + { + parse_error( "Expecting 'in' after for", ctx ); + ctx.i=wp.second; + ctx.i=skip_chars(ctx, SPACES); + } // end of arg list - if(!is_in(in[i], "\n;#")) - throw PARSE_ERROR( strf("Unexpected token '%c', expecting '\\n' or ';'", in[i]), i ); - if(in[i] == ';') - i++; - i=skip_unread(in, size, i); + if(!is_in(ctx[ctx.i], "\n;#")) + { + parse_error( unexpected_token(ctx[ctx.i])+", expecting '\\n' or ';'", ctx ); + while(!is_in(ctx[ctx.i], "\n;#")) + ctx.i++; + } + if(ctx[ctx.i] == ';') + ctx.i++; + ctx.i=skip_unread(ctx); // do - wp = get_word(in, size, i, ARG_END); + wp = get_word(ctx, ARG_END); if(wp.first != "do") - throw PARSE_ERROR( "Expecting 'do', after for", i); - i=skip_unread(in, size, wp.second); + { + parse_error( "Expecting 'do', after for", ctx); + } + else + { + ctx.i = wp.second; + ctx.i = skip_unread(ctx); + } // ops - auto lp = parse_list_until(in, size, i, "done"); + auto lp = parse_list_until(ctx, "done"); ret->ops=lp.first; - i=lp.second; + ctx=lp.second; #ifndef NO_PARSE_CATCH } catch(format_error& e) @@ -1441,32 +1728,37 @@ std::pair parse_for(const char* in, uint32_t size, uint32_ } #endif - return std::make_pair(ret, i); + return std::make_pair(ret, ctx); } -std::pair parse_while(const char* in, uint32_t size, uint32_t start) +std::pair parse_while(parse_context ctx) { while_block* ret = new while_block; - uint32_t i=start; #ifndef NO_PARSE_CATCH try { #endif +; // cond - auto pp=parse_list_until(in, size, i, "do"); + auto pp=parse_list_until(ctx, "do"); + if(pp.first->size() <= 0) + { + parse_error("condition is empty", ctx); + pp.second.has_errored=true; + } ret->cond = pp.first; - i = pp.second; - if(ret->cond->size() <= 0) - throw PARSE_ERROR("condition is empty", i); - + ctx = pp.second; // ops - auto lp = parse_list_until(in, size, i, "done"); + auto lp = parse_list_until(ctx, "done"); + if(lp.first->size() <= 0) + { + parse_error("while is empty", ctx); + lp.second.has_errored=true; + } ret->ops=lp.first; - i = lp.second; - if(ret->ops->size() <= 0) - throw PARSE_ERROR("while is empty", i); + ctx = lp.second; #ifndef NO_PARSE_CATCH } catch(format_error& e) @@ -1476,122 +1768,145 @@ std::pair parse_while(const char* in, uint32_t size, uin } #endif - return std::make_pair(ret, i); + return std::make_pair(ret, ctx); } // detect if brace, subshell, case or other -std::pair parse_block(const char* in, uint32_t size, uint32_t start) +std::pair parse_block(parse_context ctx) { - uint32_t i = skip_chars(in, size, start, SEPARATORS); + ctx.i = skip_chars(ctx, SEPARATORS); block* ret = nullptr; #ifndef NO_PARSE_CATCH try { #endif - if(i>=size) - throw PARSE_ERROR("Unexpected end of file", i); - if( in[i] == '(' ) //subshell +; + if(ctx.i>=ctx.size) { - auto pp = parse_subshell(in, size, i+1); + parse_error("Unexpected end of file", ctx); + return std::make_pair(ret, ctx); + } + if( ctx.data[ctx.i] == '(' ) //subshell + { + ctx.i++; + auto pp = parse_subshell(ctx); ret = pp.first; - i = pp.second; + ctx = pp.second; } else { - auto wp=get_word(in, size, i, BLOCK_TOKEN_END); - std::string word=wp.first; + auto wp=get_word(ctx, BLOCK_TOKEN_END); + std::string& word=wp.first; + parse_context newct=ctx; + newct.i=wp.second; // reserved words if( word == "{" ) // brace block { - auto pp = parse_brace(in, size, wp.second); + auto pp = parse_brace(newct); ret = pp.first; - i = pp.second; + ctx = pp.second; } else if(word == "case") // case { - auto pp = parse_case(in, size, wp.second); + auto pp = parse_case(newct); ret = pp.first; - i = pp.second; + ctx = pp.second; } else if( word == "if" ) // if { - auto pp=parse_if(in, size, wp.second); + auto pp=parse_if(newct); ret = pp.first; - i = pp.second; + ctx = pp.second; } else if( word == "for" ) { - auto pp=parse_for(in, size, wp.second); + auto pp=parse_for(newct); ret = pp.first; - i = pp.second; + ctx = pp.second; } else if( word == "while" ) { - auto pp=parse_while(in, size, wp.second); + auto pp=parse_while(newct); ret = pp.first; - i = pp.second; + ctx = pp.second; } else if( word == "until" ) { - auto pp=parse_while(in, size, wp.second); + auto pp=parse_while(newct); pp.first->real_condition()->negate(); ret = pp.first; - i = pp.second; + ctx = pp.second; } else if(is_in_vector(word, out_reserved_words)) // is a reserved word { - throw PARSE_ERROR( "Unexpected '"+word+"'" + expecting(g_expecting) , i); + parse_error( strf("Unexpected '%s'", word.c_str())+expecting(ctx.expecting) , ctx); } // end reserved words else if( word == "function" ) // bash style function { - if(!g_bash) - throw PARSE_ERROR("bash specific: 'function'", i); - auto wp2=get_word(in, size, skip_unread(in, size, wp.second), VARNAME_END); + if(!ctx.bash) + { + parse_error("bash specific: 'function'", ctx); + newct.has_errored=true; + } + newct.i = skip_unread(newct); + auto wp2=get_word(newct, VARNAME_END); if(!valid_name(wp2.first)) - throw PARSE_ERROR( strf("Bad function name: '%s'", word.c_str()), start ); + { + parse_error( strf("Bad function name: '%s'", word.c_str()), newct ); + } - i=skip_unread(in, size, wp2.second); - if(word_eq("()", in, size, i)) - i=skip_unread(in, size, i+2); + newct.i = wp2.second; + newct.i=skip_unread(newct); + if(word_eq("()", newct)) + { + newct.i+=2; + newct.i=skip_unread(newct); + } - auto pp = parse_function(in, size, i, "function definition"); + auto pp = parse_function(newct, "function definition"); // function name pp.first->name = wp2.first; ret = pp.first; - i = pp.second; + ctx = pp.second; } - else if(word_eq("()", in, size, skip_unread(in, size, wp.second))) // is a function + else if(word_eq("()", ctx.data, ctx.size, skip_unread(ctx.data, ctx.size, wp.second))) // is a function { if(!valid_name(word)) - throw PARSE_ERROR( strf("Bad function name: '%s'", word.c_str()), start ); + { + parse_error( strf("Bad function name: '%s'", word.c_str()), ctx ); + newct.has_errored=true; + } - auto pp = parse_function(in, size, skip_unread(in, size, wp.second)+2); + newct.i = skip_unread(ctx.data, ctx.size, wp.second)+2; + auto pp = parse_function(newct); // first arg is function name pp.first->name = word; ret = pp.first; - i = pp.second; + ctx = pp.second; } else // is a command { - auto pp = parse_cmd(in, size, i); + auto pp = parse_cmd(ctx); ret = pp.first; - i = pp.second; + ctx = pp.second; } } if(ret->type != block::block_cmd) { - uint32_t j=skip_chars(in, size, i, SPACES); - auto pp=parse_arglist(in, size, j, false, &ret->redirs); // in case of redirects + uint32_t j=skip_chars(ctx, SPACES); + ctx.i=j; + auto pp=parse_arglist(ctx, false, &ret->redirs); // in case of redirects if(pp.first != nullptr) { delete pp.first; - throw PARSE_ERROR("Extra argument after block", i); + parse_error("Extra argument after block", ctx); + pp.second.has_errored=true; } - i=pp.second; + ctx=pp.second; } #ifndef NO_PARSE_CATCH } @@ -1602,42 +1917,39 @@ std::pair parse_block(const char* in, uint32_t size, uint32_t } #endif - return std::make_pair(ret,i); + return std::make_pair(ret,ctx); } // parse main -shmain* parse_text(const char* in, uint32_t size, std::string const& filename) +std::pair parse_text(parse_context ctx) { shmain* ret = new shmain(); - uint32_t i=0; -#ifndef NO_PARSE_CATCH - try + + ret->filename=ctx.filename; + // get shebang + if(word_eq("#!", ctx)) { -#endif - ret->filename=filename; - // get shebang - if(word_eq("#!", in, size, 0)) - { - i=skip_until(in, size, 0, "\n"); - ret->shebang=std::string(in, i); - } - i = skip_unread(in, size, i); - // do bash reading - std::string binshebang = basename(ret->shebang); - g_bash = binshebang == "bash" || binshebang == "lxsh"; - // parse all commands - auto pp=parse_list_until(in, size, i, 0); - ret->lst=pp.first; - i=pp.second; -#ifndef NO_PARSE_CATCH + ctx.i=skip_until(ctx, "\n"); + ret->shebang=std::string(ctx.data, ctx.i); } - catch(format_error& e) - { - delete ret; - throw format_error(e.what(), filename, e.data(), e.where()); - } -#endif - return ret; + ctx.i = skip_unread(ctx); + // do bash reading + std::string binshebang = basename(ret->shebang); + if(!ctx.bash) + ctx.bash = (binshebang == "bash" || binshebang == "lxsh"); + // parse all commands + auto pp=parse_list_until(ctx, 0); + ret->lst=pp.first; + + if(pp.second.has_errored) + throw std::runtime_error("Aborted due to previous errors"); + + return std::make_pair(ret, pp.second); +} + +std::pair parse_text(std::string const& in, std::string const& filename) +{ + return parse_text({ .data=in.c_str(), .size=in.size(), .filename=filename.c_str()}); } // import a file's contents into a string diff --git a/src/processing.cpp b/src/processing.cpp index 9a7edbd..91a59ff 100644 --- a/src/processing.cpp +++ b/src/processing.cpp @@ -431,7 +431,7 @@ std::set get_processors(std::string const& in) if(ln.size()>1 && ln[0] == '#' && is_alphanum(ln[1])) { i+=ln.size(); - ret.insert(get_word(ln.c_str(), ln.size(), 1, SEPARATORS).first); + ret.insert(get_word(make_context(ln.substr(1)), SEPARATORS).first); } else break; @@ -450,7 +450,8 @@ bool r_do_string_processor(_obj* in) { try { - shmain* tsh = parse_text(t->val.substr(1, t->val.size()-2)); + std::string stringcode = t->val.substr(1, t->val.size()-2); + shmain* tsh = parse_text( stringcode ).first; require_rescan_all(); if(options["remove-unused"]) delete_unused( tsh, re_var_exclude, re_fct_exclude ); diff --git a/src/resolve.cpp b/src/resolve.cpp index a9033a8..e1c4026 100644 --- a/src/resolve.cpp +++ b/src/resolve.cpp @@ -59,7 +59,7 @@ void _cd(std::string const& dir) // -- COMMANDS -- // return [] -std::vector> do_include_raw(condlist* cmd, std::string const& filename, std::string* ex_dir) +std::vector> do_include_raw(condlist* cmd, parse_context ctx, std::string* ex_dir) { std::vector> ret; @@ -77,7 +77,7 @@ std::vector> do_include_raw(condlist* cmd, s std::string dir; if(g_cd && !opts['C']) { - dir=_pre_cd(filename); + dir=_pre_cd(ctx.filename); if(ex_dir!=nullptr) *ex_dir=dir; } @@ -105,7 +105,7 @@ std::vector> do_include_raw(condlist* cmd, s } // -std::pair do_resolve_raw(condlist* cmd, std::string const& filename, std::string* ex_dir) +std::pair do_resolve_raw(condlist* cmd, parse_context ctx, std::string* ex_dir) { std::pair ret; @@ -123,7 +123,7 @@ std::pair do_resolve_raw(condlist* cmd, std::string co std::string dir; if(g_cd && !opts['C']) { - dir=_pre_cd(filename); + dir=_pre_cd(ctx.filename); if(ex_dir!=nullptr) *ex_dir=dir; } @@ -152,23 +152,33 @@ std::pair do_resolve_raw(condlist* cmd, std::string co return ret; } -std::vector do_include_parse(condlist* cmd, std::string const& filename) +std::vector do_include_parse(condlist* cmd, parse_context ctx) { std::vector ret; std::string dir; - auto incs=do_include_raw(cmd, filename, &dir); + auto incs=do_include_raw(cmd, ctx, &dir); - for(auto it: incs) + std::vector shs; + shs.resize(incs.size()); + + for(uint32_t i=0; ilst->cls.begin(), sh->lst->cls.end()); // safety and cleanup sh->lst->cls.resize(0); delete sh; } + shs.resize(0); // cd back _cd(dir); @@ -176,7 +186,7 @@ std::vector do_include_parse(condlist* cmd, std::string const& filena } // if first is nullptr: is a string -std::vector do_resolve_parse(condlist* cmd, std::string const& filename) +std::vector do_resolve_parse(condlist* cmd, parse_context ctx) { std::vector ret; @@ -185,10 +195,13 @@ std::vector do_resolve_parse(condlist* cmd, std::string const& filena { // get std::string dir; - p=do_resolve_raw(cmd, filename, &dir); + p=do_resolve_raw(cmd, ctx, &dir); + // do parse - shmain* sh = parse_text(p.second); - resolve(sh); + parse_context newctx = make_context(ctx, p.second, '`'+p.first+'`'); + auto pp = parse_text(newctx); + shmain* sh = pp.first; + resolve(sh, pp.second); // get the cls ret = sh->lst->cls; // safety and cleanup @@ -207,7 +220,7 @@ std::vector do_resolve_parse(condlist* cmd, std::string const& filena // -- OBJECT CALLS -- -std::pair< std::vector , bool > resolve_condlist(condlist* in, std::string const& filename) +std::pair< std::vector , bool > resolve_condlist(condlist* in, parse_context ctx) { cmd* tc = in->first_cmd(); if(tc == nullptr) @@ -216,14 +229,14 @@ std::pair< std::vector , bool > resolve_condlist(condlist* in, std::s std::string const& strcmd=tc->arg_string(0); if(g_include && strcmd == "%include") - return std::make_pair(do_include_parse(in, filename), true); + return std::make_pair(do_include_parse(in, ctx), true); else if(g_resolve && strcmd == "%resolve") - return std::make_pair(do_resolve_parse(in, filename), true); + return std::make_pair(do_resolve_parse(in, ctx), true); else return std::make_pair(std::vector(), false); } -std::pair< std::vector , bool > resolve_arg(arg* in, std::string const& filename, bool forcequote=false) +std::pair< std::vector , bool > resolve_arg(arg* in, parse_context ctx, bool forcequote=false) { std::vector ret; if(in == nullptr) @@ -249,12 +262,12 @@ std::pair< std::vector , bool > resolve_arg(arg* in, std::string const& fi std::string fulltext; if(g_include && strcmd == "%include") { - for(auto it: do_include_raw(tc, filename) ) + for(auto it: do_include_raw(tc, ctx) ) fulltext += it.second; } else if(g_resolve && strcmd == "%resolve") { - fulltext = do_resolve_raw(tc, filename).second; + fulltext = do_resolve_raw(tc, ctx).second; } else // skip continue; @@ -320,10 +333,10 @@ std::pair< std::vector , bool > resolve_arg(arg* in, std::string const& fi return std::make_pair(ret, has_resolved); } - +void resolve(_obj* in, parse_context* ctx); // -- RECURSIVE CALL -- -bool r_resolve(_obj* o, std::string* filename) +bool r_resolve(_obj* o, parse_context* ct) { switch(o->type) { @@ -336,7 +349,7 @@ bool r_resolve(_obj* o, std::string* filename) auto t = dynamic_cast(o); for(uint32_t i=0 ; icls.size() ; i++) { - auto r=resolve_condlist(t->cls[i], *filename); + auto r=resolve_condlist(t->cls[i], *ct); if(r.second) { // add new cls after current @@ -349,7 +362,7 @@ bool r_resolve(_obj* o, std::string* filename) } else { - resolve(t->cls[i], filename); + resolve(t->cls[i], ct); } } return false; @@ -359,7 +372,7 @@ bool r_resolve(_obj* o, std::string* filename) auto t = dynamic_cast(o); for(uint32_t i=0 ; isize() ; i++) { - auto r=resolve_arg(t->args[i], *filename); + auto r=resolve_arg(t->args[i], *ct); if(r.first.size()>0) { // add new args @@ -371,7 +384,7 @@ bool r_resolve(_obj* o, std::string* filename) } else { - resolve(t->args[i], filename); + resolve(t->args[i], ct); } } return false; @@ -381,12 +394,12 @@ bool r_resolve(_obj* o, std::string* filename) auto t = dynamic_cast(o); for(auto it: t->var_assigns) // var assigns { - resolve_arg(it.second, *filename, true); // force quoted - resolve(it.second, filename); + resolve_arg(it.second, *ct, true); // force quoted + resolve(it.second, ct); } for(auto it: t->redirs) - resolve(it, filename); - resolve(t->args, filename); + resolve(it, ct); + resolve(t->args, ct); return false; }; break; case _obj::block_case : @@ -394,15 +407,15 @@ bool r_resolve(_obj* o, std::string* filename) auto t = dynamic_cast(o); for(auto sc: t->cases) { - resolve_arg(t->carg, *filename, true); // force quoted - resolve(t->carg, filename); + resolve_arg(t->carg, *ct, true); // force quoted + resolve(t->carg, ct); for(auto it: sc.first) { - resolve_arg(it, *filename, true); // force quoted - resolve(it, filename); + resolve_arg(it, *ct, true); // force quoted + resolve(it, ct); } - resolve(sc.second, filename); + resolve(sc.second, ct); } }; break; default: break; @@ -411,12 +424,13 @@ bool r_resolve(_obj* o, std::string* filename) } // recursive call of resolve -void resolve(_obj* in, std::string* filename) +void resolve(_obj* in, parse_context* ctx) { - recurse(r_resolve, in, filename); + recurse(r_resolve, in, ctx); } -void resolve(shmain* sh) +// recursive call of resolve +void resolve(_obj* in, parse_context ctx) { - recurse(r_resolve, sh, &sh->filename); + recurse(r_resolve, in, &ctx); } diff --git a/src/struc_helper.cpp b/src/struc_helper.cpp index 68b5cc2..335575b 100644 --- a/src/struc_helper.cpp +++ b/src/struc_helper.cpp @@ -9,7 +9,7 @@ arg* make_arg(std::string const& in) { - return parse_arg(in.c_str(), in.size(), 0).first; + return parse_arg(make_context(in)).first; } cmd* make_cmd(std::vector const& args) @@ -34,7 +34,7 @@ cmd* make_cmd(std::vector const& args) cmd* make_cmd(std::string const& in) { - return parse_cmd(in.c_str(), in.size(), 0).first; + return parse_cmd(make_context(in)).first; } pipeline* make_pipeline(std::vector const& bls) @@ -48,22 +48,22 @@ pipeline* make_pipeline(std::vector const& bls) pipeline* make_pipeline(std::string const& in) { - return parse_pipeline(in.c_str(), in.size(), 0).first; + return parse_pipeline(make_context(in)).first; } condlist* make_condlist(std::string const& in) { - return parse_condlist(in.c_str(), in.size(), 0).first; + return parse_condlist(make_context(in)).first; } list* make_list(std::string const& in) { - return parse_list_until(in.c_str(), in.size(), 0, 0).first; + return parse_list_until(make_context(in), 0).first; } block* make_block(std::string const& in) { - return parse_block(in.c_str(), in.size(), 0).first; + return parse_block(make_context(in)).first; } @@ -71,7 +71,7 @@ block* make_block(std::string const& in) arg* copy(arg* in) { std::string str = in->generate(0); - return parse_arg(str.c_str(), str.size(), 0).first; + return parse_arg(make_context(str)).first; } // modifiers