diff --git a/include/parse.hpp b/include/parse.hpp index 2bd9b2f..edfc9e2 100644 --- a/include/parse.hpp +++ b/include/parse.hpp @@ -11,7 +11,7 @@ extern std::string g_origin; std::string import_file(std::string const& path); -block parse(const char* in, uint32_t size); -inline block parse(std::string const& in) { return parse(in.c_str(), in.size()); } +shmain* parse(const char* in, uint32_t size); +inline shmain* parse(std::string const& in) { return parse(in.c_str(), in.size()); } #endif //PARSE_HPP diff --git a/include/struc.hpp b/include/struc.hpp index 72d1480..eb2145f 100644 --- a/include/struc.hpp +++ b/include/struc.hpp @@ -3,7 +3,6 @@ #include #include -#include #include #include @@ -12,18 +11,37 @@ structure: list_t : condlist[] +arglist_t : arg[] -block: -- group - - brace: list_t - - subsh: list_t -- cmd: arglist[] +block: can be one of +- main + string (shebang) + list_t (commands) +- brace + list_t +- subshell + list_t +- cmd: arglist - case - - arg (input) - - pair[] (cases) + arg (input) + pair[] (cases) +- if + pair[] (blocks) + list_t (else) +- for + string (variable name) + arglist (iterations) + list_t (execution) +- while + list_t (condition) + list_t (execution) + condlist: pipeline[] + or_ops[] + > always one smaller than pipeline + > designates an OR if true, otherwise an AND pipeline: block[] @@ -31,13 +49,15 @@ pipeline: arglist: arg[] -arg: -- raw -- subarg[] split into subarguments in case of subshells +arg: has + raw + subarg[] can have multiple subarguments if string and subshells -subarg: -- raw +subarg: can be one of +- string - block: subshell (substitution) +- arithmetic + */ #define AND_OP false @@ -48,26 +68,42 @@ class block; class pipeline; class arg; class subarg; +class cmd; // type pack of condlist -typedef std::vector list_t; +typedef std::vector list_t; +typedef std::vector arglist_t; -block make_cmd(std::vector args); +cmd* make_cmd(std::vector args); bool add_include(std::string const& file); +// meta subarg type +class subarg +{ +public: + // type + enum argtype { s_string, s_subshell, s_arithmetic }; + argtype type; + + virtual ~subarg() {;} + + std::string generate(int ind); +}; + class arg { public: arg() { ; } arg(std::string const& str) {this->setstring(str);} + ~arg() { for(auto it: sa) delete it; } void setstring(std::string const& str); // has to be set manually std::string raw; - std::vector sa; + std::vector sa; // return if is a string and only one subarg std::string string(); @@ -75,111 +111,283 @@ public: std::string generate(int ind); }; +// arglist + class arglist { public: - inline void add(arg const& in) { args.push_back(in); } - inline void push_back(arg const& in) { args.push_back(in); } + ~arglist() { for( auto it: args ) delete it; } + inline void add(arg* in) { args.push_back(in); } + inline void push_back(arg* in) { args.push_back(in); } - std::vector args; + arglist_t args; std::vector strargs(uint32_t start); inline uint64_t size() { return args.size(); } - inline arg& operator[](uint32_t i) { return args[i]; } + inline arg* operator[](uint32_t i) { return args[i]; } std::string generate(int ind); }; -class redir -{ -public: - enum redirtype { none, write, append, read, raw } ; - redir(redirtype in=none) { type=in; } - redirtype type; - arg val; -}; -class block -{ -public: - // type - enum blocktype { none, subshell, brace, main, cmd, function, case_block, for_block, if_block, while_block}; - blocktype type; - - // ctor - block() { type=none; } - block(blocktype in) { type=in; } - - // subshell/brace/main - list_t cls; - - // cmd - arglist args; - - // case - arg carg; - std::vector< std::pair, list_t> > cases; - - // main: shebang - // function: name - std::string shebang; - - // subshell: return the containing cmd, if it is a single command - block* single_cmd(); - - std::string generate(int ind=0, bool print_shebang=true); - -private: - std::string generate_cmd(int ind); - std::string generate_case(int ind); -}; +// PL class pipeline { public: - pipeline() {;} - pipeline(block const& bl) { cmds.push_back(bl); } - inline void add(block bl) { this->cmds.push_back(bl); } - std::vector cmds; + pipeline() { negated=false; } + pipeline(block* bl) { cmds.push_back(bl); negated=false; } + inline void add(block* bl) { this->cmds.push_back(bl); } + std::vector cmds; + + bool negated; // negated return value (! at start) std::string generate(int ind); }; +// CL + class condlist { public: condlist() { parallel=false; } - condlist(block const& pl) { parallel=false; this->add(pl);} - condlist(pipeline const& pl) { parallel=false; this->add(pl);} + condlist(pipeline const& pl) { parallel=false; this->add(new pipeline(pl));} + condlist(pipeline* pl) { parallel=false; this->add(pl);} bool parallel; // & at the end - void add(pipeline const& pl, bool or_op=false); + void add(pipeline* pl, bool or_op=false); // don't push_back here, use add() instead - std::vector pls; + std::vector pls; std::vector or_ops; // size of 1 less than pls, defines separator between pipelines + void negate(); + + std::string generate(int ind, bool pre_indent=true); +}; + +// class redir +// { +// public: +// enum redirtype { none, write, append, read, raw } ; +// redir(redirtype in=none) { type=in; } +// redirtype type; +// arg val; +// }; + + +class cmd; + +// Meta block +class block +{ +public: + // type + enum blocktype { block_subshell, block_brace, block_main, block_cmd, block_function, block_case, block_if, block_for, block_while, block_until }; + blocktype type; + + // ctor + block() { redirs=nullptr; } + virtual ~block() { if(redirs!=nullptr) delete redirs; } + + // cmd + arglist* redirs; + + // subshell: return the containing cmd, if it is a single command + cmd* single_cmd(); + + std::string generate_redirs(int ind); + + virtual std::string generate(int ind)=0; +}; + +// block types + +class subshell : public block +{ +public: + subshell() { type=block::block_subshell; } + ~subshell() { for(auto it: cls) delete it; } + + cmd* single_cmd(); + + list_t cls; + + std::string generate(int ind); +}; +class brace : public block +{ +public: + brace() { type=block::block_brace; } + ~brace() { + if(redirs!=nullptr) delete redirs; + for(auto it: cls) delete it; } + + cmd* single_cmd(); + + list_t cls; + std::string generate(int ind); }; - -class subarg +class shmain : public block { public: - // type - enum argtype { string, subshell, arithmetic }; - argtype type; + shmain() { type=block::block_main; } + ~shmain() { + if(redirs!=nullptr) delete redirs; + for(auto it: cls) delete it; } - // ctor - subarg(argtype in) { this->type=in; } - subarg(std::string const& in="") { type=string; val=in; } - subarg(block const& in) { type=subshell; sbsh=in; } + std::string shebang; + list_t cls; + + std::string generate(bool print_shebang=true, int ind=0); + std::string generate(int ind); +}; + +class function : public block +{ +public: + function() { type=block::block_function; } + ~function() { + if(redirs!=nullptr) delete redirs; + for(auto it: cls) delete it; } + + std::string name; + list_t cls; + + std::string generate(int ind); +}; + +class cmd : public block +{ +public: + cmd(arglist* in=nullptr) { type=block::block_cmd; args=in; } + ~cmd() { + if(redirs!=nullptr) delete redirs; + if(args!=nullptr) delete args; } + + static const std::string empty_string; + + std::string const& firstarg_raw(); + + arglist* args; + + std::string generate(int ind); +}; + +class case_block : public block +{ +public: + case_block(arg* in=nullptr) { type=block::block_case; carg=in; } + ~case_block() { + if(redirs!=nullptr) delete redirs; + if(carg!=nullptr) delete carg; + for( auto cit : cases ) + { + for( auto ait : cit.first ) + delete ait; + for( auto lit : cit.second ) + delete lit; + } + } + + arg* carg; + std::vector< std::pair > cases; + + std::string generate(int ind); +}; + +class if_block : public block +{ +public: + if_block() { type=block::block_if; } + ~if_block() { + if(redirs!=nullptr) delete redirs; + for(auto it: else_cls) delete it; + for(auto ifb: blocks) + { + for(auto it: ifb.first) + delete it; + for(auto it: ifb.second) + delete it; + } + } + + std::vector< std::pair > blocks; + + list_t else_cls; + + std::string generate(int ind); +}; + +class for_block : public block +{ +public: + for_block(std::string const& name="", arglist* args=nullptr) { type=block::block_for; varname=name; iter=args; } + ~for_block() { + if(redirs!=nullptr) delete redirs; + if(iter!=nullptr) delete iter; + for(auto it: ops) delete it; + } + + std::string varname; + + arglist* iter; + list_t ops; + + std::string generate(int ind); +}; + +class while_block : public block +{ +public: + while_block() { type=block::block_while; } + ~while_block() { + if(redirs!=nullptr) delete redirs; + for(auto it: cond) delete it; + for(auto it: ops) delete it; + } + + condlist* real_condition() { return *(cond.end()-1); } + + list_t cond; + list_t ops; + + std::string generate(int ind); +}; + +// Subarg subtypes + +class subarg_string : public subarg +{ +public: + subarg_string(std::string const& in="") { type=subarg::s_string; val=in; } - // raw string std::string val; - // subshell - block sbsh; + + std::string generate(int ind) { return val; } +}; + +class subarg_arithmetic : public subarg +{ +public: + subarg_arithmetic() { type=subarg::s_arithmetic; } + + std::string val; + + std::string generate(int ind) { return "$(("+val+"))"; } +}; + +class subarg_subshell : public subarg +{ +public: + subarg_subshell(subshell* in=nullptr) { type=subarg::s_subshell; sbsh=in; } + subarg_subshell(subshell in) { type=subarg::s_subshell; sbsh=new subshell(in); } + ~subarg_subshell() { if(sbsh != nullptr) delete sbsh;} + + subshell* sbsh; std::string generate(int ind); }; diff --git a/src/generate.cpp b/src/generate.cpp index 40f6542..9dff0f4 100644 --- a/src/generate.cpp +++ b/src/generate.cpp @@ -15,12 +15,20 @@ bool is_sub_special_cmd(std::string in) return in == "%include_sub" || in == "%resolve_sub"; } +std::string indented(std::string const& in, uint32_t ind) +{ + if(!opt_minimize) + return indent(ind) + in; + else + return in; +} + std::string arg::generate(int ind) { std::string ret; for(auto it: sa) { - ret += it.generate(ind); + ret += it->generate(ind); } return ret; } @@ -31,7 +39,7 @@ std::string arglist::generate(int ind) for(auto it: args) { - ret += it.generate(ind); + ret += it->generate(ind); ret += ' '; } @@ -47,31 +55,33 @@ std::string pipeline::generate(int ind) if(cmds.size()<=0) return ""; - ret += cmds[0].generate(ind); + if(negated) + ret += "! "; + ret += cmds[0]->generate(ind); for(uint32_t i=1 ; igenerate(ind); } return ret; } -std::string condlist::generate(int ind) +std::string condlist::generate(int ind, bool pre_indent) { std::string ret; if(pls.size() <= 0) return ""; - if(!opt_minimize) - ret += INDENT; - ret += pls[0].generate(ind); + if(pre_indent) + ret += indented("", ind); + ret += pls[0]->generate(ind); for(uint32_t i=0 ; igenerate(ind); } if(ret=="") return ""; @@ -136,18 +146,18 @@ std::string generate_resolve(std::vector args, int ind) if(opts['p']) { - block bl; + shmain* sh; try { - bl = parse(p.first); + sh = parse(p.first); } catch(ztd::format_error& e) { throw ztd::format_error(e.what(), "command `"+cmd+'`', e.data(), e.where()); } - ret = bl.generate(ind, false); - std::string tmpind=INDENT; - ret = ret.substr(tmpind.size()); + ret = sh->generate(false, ind); + delete sh; + ret = ret.substr(indent(ind).size()); ret.pop_back(); // remove \n } else @@ -198,7 +208,7 @@ std::string generate_include(std::vector args, int ind) std::string file; - block bl; + shmain* bl=nullptr; bool indent_remove=true; for(auto it : v) @@ -224,12 +234,12 @@ std::string generate_include(std::vector args, int ind) { throw ztd::format_error(e.what(), it, e.data(), e.where()); } - file = bl.generate(ind, false); + file = bl->generate(false, ind); + delete bl; if(indent_remove) { indent_remove=false; - std::string tmpind=INDENT; - file = file.substr(tmpind.size()); + file = file.substr(indent(ind).size()); } ret += file; } @@ -246,161 +256,253 @@ std::string generate_include(std::vector args, int ind) return ret; } -std::string block::generate_cmd(int ind) +// BLOCK + +std::string block::generate_redirs(int ind) { std::string ret; - if(args.size()<=0) - return ""; - std::string cmd=args[0].raw; - if(cmd == "%include" || cmd == "%include_s") + if(redirs != nullptr) { - ret += generate_include(args.strargs(1), ind); - } - else if(cmd == "%resolve" || cmd == "%resolve_s") - { - ret += generate_resolve(args.strargs(1), ind); - } - else - ret = args.generate(ind); - return ret; -} - -std::string block::generate_case(int ind) -{ - std::string ret; - ret += "case " + carg.generate(ind) + " in\n"; - ind++; - for(auto cs: this->cases) - { - // case definition : foo) - if(!opt_minimize) ret += INDENT; - for(auto it: cs.first) - ret += it.generate(ind) + '|'; - ret.pop_back(); - ret += ')'; - if(!opt_minimize) ret += '\n'; - // commands - for(auto it: cs.second) - ret += it.generate(ind+1); - // end of case: ;; - if(opt_minimize) - { - // ;; can be right after command - if(ret[ret.size()-1] == '\n') - ret.pop_back(); - } - else - { - ind++; - ret += INDENT; - ind--; - } - ret += ";;\n"; - } - // close case - ind--; - if(!opt_minimize) ret += INDENT; - ret += "esac"; - return ret; -} - -std::string block::generate(int ind, bool print_shebang) -{ - std::string ret; - - if(type==cmd) - { - ret += generate_cmd(ind); - } - else - { - if(type==function) - { - // function definition - ret += shebang + "()"; - if(!opt_minimize) ret += '\n' + INDENT; - ret += "{\n"; - // commands - for(auto it: cls) - ret += it.generate(ind+1); - if(!opt_minimize) ret += INDENT; - // end function - ret += '}'; - } - else if(type==subshell) - { - // open subshell - ret += '('; - if(!opt_minimize) ret += '\n'; - // commands - for(auto it: cls) - ret += it.generate(ind+1); - if(opt_minimize && ret.size()>1) - ret.pop_back(); // ) can be right after command - else - ret += INDENT; - // close subshell - ret += ')'; - } - else if(type==brace) - { - ret += "{\n" ; - for(auto it: cls) - ret += it.generate(ind+1); - if(!opt_minimize) - ret += INDENT; - ret += '}'; - } - else if(type==main) - { - if(print_shebang && shebang!="") - ret += shebang + '\n'; - for(auto it: cls) - ret += it.generate(ind); - } - else if(type==case_block) - { - ret += generate_case(ind); - } - - std::string t = generate_cmd(ind); // leftover redirections + std::string t = redirs->generate(ind); if(t!="") { if(!opt_minimize) ret += ' '; ret += t; } - } + return ret; +} + +std::string if_block::generate(int ind) +{ + std::string ret; + + for(uint32_t i=0; igenerate(ind+1, false); + // other cmds: on new lines + for(uint32_t j=1; jgenerate(ind+1); + + // execution + ret += indented("then\n", ind); + for(auto it: blocks[i].second) + ret += it->generate(ind+1); + } + + if(else_cls.size()>0) + { + ret += indented("else\n", ind); + for(auto it: else_cls) + ret += it->generate(ind+1); + } + + ret += indented("fi", ind); + return ret; +} + +std::string for_block::generate(int ind) +{ + std::string ret; + + ret += "for "+varname; + if(iter != nullptr) + ret += " in " + iter->generate(ind); + ret += '\n'; + ret += indented("do\n", ind); + for(auto it: ops) + ret += it->generate(ind+1); + ret += indented("done", ind); return ret; } +std::string while_block::generate(int ind) +{ + std::string ret; + + ret += "while"; + if(cond.size() == 1) + { + ret += " " + cond[0]->generate(ind+1, false); + } + else + { + ret += '\n'; + for(auto it: cond) + ret += it->generate(ind+1); + } + ret += indented("do\n", ind); + for(auto it: ops) + ret += it->generate(ind+1); + ret += indented("done", ind); + + return ret; +} + +std::string subshell::generate(int ind) +{ + std::string ret; + // open subshell + ret += '('; + if(!opt_minimize) ret += '\n'; + // commands + for(auto it: cls) + ret += it->generate(ind+1); + if(opt_minimize && ret.size()>1) + ret.pop_back(); // ) can be right after command + // close subshell + ret += indented(")", ind); + + ret += generate_redirs(ind); + + return ret; +} + +std::string shmain::generate(int ind) +{ + return this->generate(false, ind); +} +std::string shmain::generate(bool print_shebang, int ind) +{ + std::string ret; + if(print_shebang && shebang!="") + ret += shebang + '\n'; + for(auto it: cls) + ret += it->generate(ind); + return ret; +} + +std::string brace::generate(int ind) +{ + std::string ret; + ret += "{\n" ; + for(auto it: cls) + ret += it->generate(ind+1); + + ret += indented("}", ind); + + ret += generate_redirs(ind); + + return ret; +} + +std::string function::generate(int ind) +{ + std::string ret; + // function definition + ret += name + "()"; + if(!opt_minimize) ret += '\n' + indent(ind); + ret += "{\n"; + // commands + for(auto it: cls) + ret += it->generate(ind+1); + ret += indented("}", ind); + + ret += generate_redirs(ind); + + return ret; +} + +std::string case_block::generate(int ind) +{ + std::string ret; + ret += "case " + carg->generate(ind) + " in\n"; + ind++; + for(auto cs: this->cases) + { + // case definition : foo) + ret += indented("", ind); + // args + for(auto it: cs.first) + ret += it->generate(ind) + '|'; + ret.pop_back(); + ret += ')'; + if(!opt_minimize) ret += '\n'; + // commands + for(auto it: cs.second) + ret += it->generate(ind+1); + // end of case: ;; + if(opt_minimize && ret[ret.size()-1] == '\n') // ;; can be right after command + { + ret.pop_back(); + } + ret += indented(";;\n", ind+1); + } + // close case + ind--; + ret += indented("esac", ind); + + ret += generate_redirs(ind); + + return ret; +} + +std::string cmd::generate(int ind) +{ + std::string ret; + if(args==nullptr || args->size()<=0) + return ""; + std::string cmdname=(*args)[0]->raw; + if(cmdname == "%include" || cmdname == "%include_s") + { + ret += generate_include(args->strargs(1), ind); + } + else if(cmdname == "%resolve" || cmdname == "%resolve_s") + { + ret += generate_resolve(args->strargs(1), ind); + } + else + ret = args->generate(ind); + return ret; +} + +// TEMPLATE + +// std::string thing::generate(int ind) +// { +// std::string ret; +// return ret; +// } + +// SUBARG + std::string subarg::generate(int ind) +{ + switch(type) + { + case subarg::s_string: + return dynamic_cast(this)->generate(ind); + case subarg::s_arithmetic: + return dynamic_cast(this)->generate(ind); + case subarg::s_subshell: + return dynamic_cast(this)->generate(ind); + } + // doesn't happen, just to get rid of warning + return ""; +} + +std::string subarg_subshell::generate(int ind) { std::string ret; - if(type == subarg::string) + // includes and resolves inside command substitutions + // resolve here and not inside subshell + cmd* cmd = sbsh->single_cmd(); + if( cmd != nullptr && (cmd->firstarg_raw() == "%include" || cmd->firstarg_raw() == "%resolve") ) { - ret += val; + ret += cmd->generate(ind); } - else if(type == subarg::arithmetic) + // regular substitution + else { - ret += "$(("+val+"))"; - } - else if(type == subarg::subshell) - { - // includes and resolves inside command substitutions - // resolve here and not inside subshell - block* cmd = sbsh.single_cmd(); - if( cmd != nullptr && (cmd->args[0].raw == "%include" || cmd->args[0].raw == "%resolve") ) - { - ret += cmd->generate(ind); - } - // regular substitution - else - { - ret += '$'; - ret += sbsh.generate(ind); - } + ret += '$'; + ret += sbsh->generate(ind); } return ret; } diff --git a/src/main.cpp b/src/main.cpp index 241ed04..6365dc3 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -12,9 +12,9 @@ #include "parse.hpp" #include "options.hpp" -int execute(block& sh, std::vector& args) +int execute(shmain* sh, std::vector& args) { - std::string data=sh.generate(); + std::string data=sh->generate(); std::string filename=ztd::exec("basename", args[0]).first; filename.pop_back(); @@ -76,10 +76,11 @@ int main(int argc, char* argv[]) return 1; } + // resolve input std::string file; - if(args.size() > 0) + if(args.size() > 0) // argument provided { - if(args[0] == "-") + if(args[0] == "-" || args[0] == "/dev/stdin") //stdin { piped=true; file = "/dev/stdin"; @@ -89,55 +90,63 @@ int main(int argc, char* argv[]) } else { - if(isatty(fileno(stdin))) + if(isatty(fileno(stdin))) // stdin is interactive { print_help(argv[0]); return 1; } - else + else // is piped { piped=true; file = "/dev/stdin"; } } - + // set origin file g_origin=file; add_include(file); + shmain* sh=nullptr; try { - block sh(parse(import_file(file))); + // parse + sh = parse(import_file(file)); + // resolve shebang std::string curbin, binshebang; curbin=ztd::exec("basename", argv[0]).first; - binshebang=ztd::exec("basename", sh.shebang).first; + binshebang=ztd::exec("basename", sh->shebang).first; if(binshebang==curbin) - sh.shebang="#!/bin/sh"; - if(options['e']) + sh->shebang="#!/bin/sh"; + // process + if(options['e']) // force exec { return execute(sh, args); } - else if(options['c']) + else if(options['c']) // force console out { - std::cout << sh.generate(); + std::cout << sh->generate(); } - else if(options['o']) + else if(options['o']) // file output { std::string destfile=options['o']; + // resolve - to stdout if(destfile == "-") destfile = "/dev/stdout"; - std::ofstream(destfile) << sh.generate(); - ztd::exec("chmod", "+x", destfile); + // output + std::ofstream(destfile) << sh->generate(); + // don't chmod on /dev/ + if(destfile.substr(0,5) != "/dev/") + ztd::exec("chmod", "+x", destfile); } - else + else // other process { - if(binshebang == curbin) + if(binshebang == curbin) // exec if shebang is program { return execute(sh, args); } - else + else // output otherwise { - std::cout << sh.generate(); + std::cout << sh->generate(); } } } @@ -151,6 +160,9 @@ int main(int argc, char* argv[]) std::cerr << e.what() << std::endl; return 2; } + + if(sh!=nullptr) + delete sh; return 0; diff --git a/src/parse.cpp b/src/parse.cpp index c9d1bff..6e106bc 100644 --- a/src/parse.cpp +++ b/src/parse.cpp @@ -8,11 +8,42 @@ std::string g_origin; +const char* SPACES=" \t"; +const char* SEPARATORS=" \t\n"; +const char* ARG_END=" \t&|;\n#()"; +const char* SPECIAL_TOKENS="&|;\n#()"; +const char* ALL_TOKENS="&|;\n#(){}"; + +const std::vector reserved_words = { "if", "then", "else", "fi", "case", "esac", "for", "while", "until", "do", "done", "{", "}" }; + +std::string g_expecting; + +std::string expecting(std::string const& word) +{ + if(word != "") + return ", expecting '"+word+"'"; + else + return ""; +} + +// basic char utils + inline bool is_in(char c, const char* set) { return strchr(set, c) != NULL; } +bool has_common_char(const char* str1, const char* str2) +{ + uint32_t i=0; + while(str1[i]!=0) + { + if(is_in(str1[i], str2)) + return true; + } + return false; +} + inline bool is_alphanum(char c) { return (c >= 'a' && c<='z') || (c >= 'A' && c<='Z') || (c >= '0' && c<='9'); @@ -22,6 +53,31 @@ inline bool is_alpha(char c) return (c >= 'a' && c<='z') || (c >= 'A' && c<='Z'); } +bool is_alphanum(std::string const& str) +{ + for(auto it: str) + { + if(! (is_alphanum(it) || it=='_' ) ) + return false; + } + return true; +} + +bool valid_name(std::string const& str) +{ + return (is_alpha(str[0]) || str[0] == '_') && is_alphanum(str); +} + +// string utils + +bool word_is_reserved(std::string const in) +{ + for(auto it: reserved_words) + if(in == it) + return true; + return false; +} + bool word_eq(const char* word, const char* in, uint32_t size, uint32_t start, const char* end_set=NULL) { uint32_t wordsize=strlen(word); @@ -38,13 +94,13 @@ bool word_eq(const char* word, const char* in, uint32_t size, uint32_t start, co return false; } -std::string get_word(const char* in, uint32_t size, uint32_t start, const char* end_set) +std::pair get_word(const char* in, uint32_t size, uint32_t start, const char* end_set) { uint32_t i=start; while(i parse_subshell(const char* in, uint32_t size, uint32_t start); +// parse fcts + +std::pair parse_subshell(const char* in, uint32_t size, uint32_t start); // parse an arithmetic // ends at )) // for now, uses subshell parsing then takes raw string value // temporary, to improve -std::pair parse_arithmetic(const char* in, uint32_t size, uint32_t start) +std::pair parse_arithmetic(const char* in, uint32_t size, uint32_t start) { - subarg ret(subarg::arithmetic); + subarg_arithmetic* ret = new subarg_arithmetic; uint32_t i=start; - auto pp=parse_subshell(in, size, i); - i=pp.second; - if(i >= size || in[i]!=')') - throw ztd::format_error( "Unexpected token ')', expecting '))'", g_origin, in, i ); - ret.val = std::string(in+start, i-start-1); - i++; + try + { + auto pp=parse_subshell(in, size, i); + i=pp.second; + delete pp.first; + if(i >= size || in[i]!=')') + { + throw ztd::format_error( "Unexpected token ')', expecting '))'", g_origin, in, i ); + } + ret->val = std::string(in+start, i-start-1); + i++; + } + catch(ztd::format_error& e) + { + delete ret; + throw e; + } return std::make_pair(ret, i); } @@ -101,106 +170,116 @@ std::pair parse_arithmetic(const char* in, uint32_t size, uint // parse one argument // must start at a read char // ends at either " \t|&;\n()" -std::pair parse_arg(const char* in, uint32_t size, uint32_t start) +std::pair parse_arg(const char* in, uint32_t size, uint32_t start) { - arg ret; + arg* ret = new arg; // j : start of subarg uint32_t i=start,j=start,q=start; - if(is_in(in[i], "&|;\n#()")) - throw ztd::format_error( strf("Unexpected token '%c'", in[i]) , g_origin, in, i); - - while(i") && in[i+1]=='&') // special case for <& and >& + + if(is_in(in[i], SPECIAL_TOKENS)) + throw ztd::format_error( strf("Unexpected token '%c'", in[i]) , g_origin, in, i); + + while(i=size) - break; - i++; - } - else if(in[i] == '"') // start double quote - { - q=i; - i++; - while(in[i] != '"') // while inside quoted string + if(i+1") && in[i+1]=='&') // special case for <& and >& { - if(in[i] == '\\') // backslash: don't check next char - { - i+=2; - } - else if( word_eq("$((", in, size, i) ) // arithmetic operation - { - // add previous subarg - ret.sa.push_back(subarg(std::string(in+j, i-j))); - i+=3; - // get arithmetic - auto r=parse_arithmetic(in, size, i); - ret.sa.push_back(r.first); - j = i = r.second; - } - else if( word_eq("$(", in, size, i) ) // substitution - { - // add previous subarg - ret.sa.push_back(subarg(std::string(in+j, i-j))); - i+=2; - // get subshell - auto r=parse_subshell(in, size, i); - ret.sa.push_back(subarg(r.first)); - j = i = r.second; - } - else - i++; - - if(i>=size) - throw ztd::format_error("Unterminated double quote", g_origin, in, q); + i+=2; } - i++; - } - else if(in[i] == '\'') // start single quote - { - q=i; - i++; - while(i=size) + break; + i++; + } + else if(in[i] == '"') // start double quote + { + q=i; + i++; + while(in[i] != '"') // while inside quoted string + { + if(in[i] == '\\') // backslash: don't check next char + { + i+=2; + } + else if( word_eq("$((", in, size, i) ) // arithmetic operation + { + // add previous subarg + ret->sa.push_back(new subarg_string(std::string(in+j, i-j))); + i+=3; + // get arithmetic + auto r=parse_arithmetic(in, size, i); + ret->sa.push_back(r.first); + j = i = r.second; + } + else if( word_eq("$(", in, size, i) ) // substitution + { + // add previous subarg + ret->sa.push_back(new subarg_string(std::string(in+j, i-j))); + i+=2; + // get subshell + auto r=parse_subshell(in, size, i); + ret->sa.push_back(new subarg_subshell(r.first)); + j = i = r.second; + } + else + i++; + + if(i>=size) + throw ztd::format_error("Unterminated double quote", g_origin, in, q); + } + i++; + } + else if(in[i] == '\'') // start single quote + { + q=i; + i++; + while(i=size) + throw ztd::format_error("Unterminated single quote", g_origin, in, q); + i++; + } + else if( word_eq("$((", in, size, i) ) // arithmetic operation + { + // add previous subarg + ret->sa.push_back(new subarg_string(std::string(in+j, i-j))); + i+=3; + // get arithmetic + auto r=parse_arithmetic(in, size, i); + ret->sa.push_back(r.first); + j = i = r.second; + } + else if( word_eq("$(", in, size, i) ) // substitution + { + // add previous subarg + ret->sa.push_back(new subarg_string(std::string(in+j, i-j))); + i+=2; + // get subshell + auto r=parse_subshell(in, size, i); + ret->sa.push_back(new subarg_subshell(r.first)); + j = i = r.second; + } + else i++; - if(i>=size) - throw ztd::format_error("Unterminated single quote", g_origin, in, q); - i++; } - else if( word_eq("$((", in, size, i) ) // arithmetic operation - { - // add previous subarg - ret.sa.push_back(subarg(std::string(in+j, i-j))); - i+=3; - // get arithmetic - auto r=parse_arithmetic(in, size, i); - ret.sa.push_back(r.first); - j = i = r.second; - } - else if( word_eq("$(", in, size, i) ) // substitution - { - // add previous subarg - ret.sa.push_back(subarg(std::string(in+j, i-j))); - i+=2; - // get subshell - auto r=parse_subshell(in, size, i); - ret.sa.push_back(subarg(r.first)); - j = i = r.second; - } - else - i++; + + // add string subarg + std::string val=std::string(in+j, i-j); + ret->sa.push_back(new subarg_string(val)); + + // raw string for other uses + ret->raw = std::string(in+start, i-start); + + } + catch(ztd::format_error& e) + { + delete ret; + throw e; } - - // add string subarg - std::string val=std::string(in+j, i-j); - ret.sa.push_back(subarg(val)); - - // raw string for other uses - ret.raw = std::string(in+start, i-start); return std::make_pair(ret, i); } @@ -209,48 +288,72 @@ std::pair parse_arg(const char* in, uint32_t size, uint32_t start // must start at a read char // first char has to be read // ends at either &|;\n#() -std::pair parse_arglist(const char* in, uint32_t size, uint32_t start, bool hard_error=false) +std::pair parse_arglist(const char* in, uint32_t size, uint32_t start, bool hard_error=false) { uint32_t i=start; - arglist ret; - if(is_in(in[i], "&|;\n#(){}")) + arglist* ret = new arglist; + + try { - if(hard_error) - throw ztd::format_error( strf("Unexpected token '%c'", in[i]) , g_origin, in, i); - else - return std::make_pair(ret, i); + if(is_in(in[i], SPECIAL_TOKENS)) + { + if(hard_error) + throw ztd::format_error( strf("Unexpected token '%c'", in[i]) , g_origin, in, i); + else + return std::make_pair(ret, i); + } + while(iargs.push_back(pp.first); + i = skip_chars(in, size, pp.second, SPACES); + if(i>=size) + return std::make_pair(ret, i); + if(is_in(in[i], SPECIAL_TOKENS) ) + return std::make_pair(ret, i); + } } - while(i=size || is_in(in[i], "&|;\n#()") ) - return std::make_pair(ret, i); + delete ret; + throw e; } return std::make_pair(ret, i); } -std::pair parse_block(const char* in, uint32_t size, uint32_t start); +std::pair parse_block(const char* in, uint32_t size, uint32_t start); // parse a pipeline // must start at a read char // separated by | // ends at either &;\n#) -std::pair parse_pipeline(const char* in, uint32_t size, uint32_t start) +std::pair parse_pipeline(const char* in, uint32_t size, uint32_t start) { uint32_t i=start; - pipeline ret; - while(i=size || is_in(in[i], "&;\n#)") ) || word_eq("||", in, size, i) ) - return std::make_pair(ret, i); - else if( in[i] != '|') - throw ztd::format_error( strf("Unexpected token: '%c'", in[i] ), g_origin, in, i); - i++; + if(in[i] == '!' && i+1negated = true; + i=skip_chars(in, size, i+1, SPACES); + } + while(iadd(pp.first); + i = skip_chars(in, size, pp.second, SPACES); + if( i>=size || is_in(in[i], "&;\n#)") || word_eq("||", in, size, i) ) + return std::make_pair(ret, i); + else if( in[i] != '|') + throw ztd::format_error( strf("Unexpected token: '%c'", in[i] ), g_origin, in, i); + i++; + } + } + catch(ztd::format_error& e) + { + delete ret; + throw e; } return std::make_pair(ret, i); } @@ -259,139 +362,275 @@ std::pair parse_pipeline(const char* in, uint32_t size, uint // must start at a read char // separated by && or || // ends at either ;\n)# -std::pair parse_condlist(const char* in, uint32_t size, uint32_t start) +std::pair parse_condlist(const char* in, uint32_t size, uint32_t start) { uint32_t i = skip_unread(in, size, start); - condlist ret; - bool optype=AND_OP; - while(i=size || is_in(in[i], ")#")) // end here exactly: used for control later + bool optype=AND_OP; + while(iadd(pp.first, optype); + i = pp.second; + if(i>=size || is_in(in[i], ")#")) // end here exactly: used for control later + { + return std::make_pair(ret, i); + } + else if(is_in(in[i], ";\n")) // end one char after: skip them for next parse + { + return std::make_pair(ret, i+1); + } + else if( word_eq("&", in, size, i) && !word_eq("&&", in, size, i) ) // parallel: end one char after + { + ret->parallel=true; + i++; + return std::make_pair(ret, i); + } + else if( word_eq("&&", in, size, i) ) // and op + { + i += 2; + optype=AND_OP; + } + else if( word_eq("||", in, size, i) ) // or op + { + i += 2; + optype=OR_OP; + } + else if(i=size) + throw ztd::format_error( "Unexpected end of file", g_origin, in, i ); } - else if(is_in(in[i], ";\n")) // end one char after: skip them for next parse - { - i++; - return std::make_pair(ret, i); - } - else if( word_eq("&", in, size, i) && !word_eq("&&", in, size, i) ) // parallel: end one char after - { - ret.parallel=true; - i++; - return std::make_pair(ret, i); - } - else if( word_eq("&&", in, size, i) ) // and op - { - i += 2; - optype=AND_OP; - } - else if( word_eq("||", in, size, i) ) // or op - { - i += 2; - optype=OR_OP; - } - else if(i=size) - throw ztd::format_error( "Unexpected end of file", g_origin, in, i ); + } + catch(ztd::format_error& e) + { + delete ret; + throw e; } return std::make_pair(ret, i); } +std::pair parse_list_until(const char* in, uint32_t size, uint32_t start, char end_c) +{ + std::vector ret; + uint32_t i=skip_unread(in, size, start); + try + { + while(end_c == 0 || in[i] != end_c) + { + auto pp=parse_condlist(in, size, i); + ret.push_back(pp.first); + i = skip_unread(in, size, pp.second); + if(i>=size) + { + if(end_c != 0) + throw ztd::format_error(strf("Expecting '%c'", end_c), g_origin, in, start-1); + else + break; + } + } + } + catch(ztd::format_error& e) + { + for(auto it: ret) + delete it; + throw e; + } + return std::make_pair(ret, i); +} + +std::pair parse_list_until(const char* in, uint32_t size, uint32_t start, std::string const& end_word) +{ + std::vector ret; + uint32_t i=skip_unread(in, size, start); + try + { + std::string old_expect=g_expecting; + g_expecting=end_word; + while(true) + { + // check word + auto wp=get_word(in, size, i, ARG_END); + if(wp.first == end_word) + { + i=wp.second; + break; + } + // do a parse + auto pp=parse_condlist(in, size, i); + ret.push_back(pp.first); + i = skip_unread(in, size, pp.second); + // word wasn't found + if(i>=size) + { + throw ztd::format_error(strf("Expecting '%s'", end_word.c_str()), g_origin, in, start-1); + } + } + g_expecting=old_expect; + } + catch(ztd::format_error& e) + { + for(auto it: ret) + delete it; + throw e; + } + return std::make_pair(ret, i); +} + + +std::tuple parse_list_until(const char* in, uint32_t size, uint32_t start, std::vector const& end_words) +{ + std::vector ret; + uint32_t i=skip_unread(in, size, start);; + std::string found_end_word; + try + { + std::string old_expect=g_expecting; + g_expecting=end_words[0]; + bool stop=false; + while(true) + { + // check words + auto wp=get_word(in, size, i, ARG_END); + for(auto it: end_words) + { + if(wp.first == it) + { + found_end_word=it; + i=wp.second; + stop=true; + break; + } + } + if(stop) + break; + // do a parse + auto pp=parse_condlist(in, size, i); + ret.push_back(pp.first); + i = skip_unread(in, size, pp.second); + // word wasn't found + if(i>=size) + { + throw ztd::format_error(strf("Expecting '%s'", end_words[0].c_str()), g_origin, in, start-1); + } + } + g_expecting=old_expect; + } + catch(ztd::format_error& e) + { + for(auto it: ret) + delete it; + throw e; + } + return std::make_tuple(ret, i, found_end_word); +} + // parse a subshell // must start right after the opening ( // ends at ) and nothing else -std::pair parse_subshell(const char* in, uint32_t size, uint32_t start) +std::pair parse_subshell(const char* in, uint32_t size, uint32_t start) { uint32_t i = skip_unread(in, size, start); - block ret(block::subshell); - while(in[i] != ')') + subshell* ret = new subshell; + + try { - auto pp=parse_condlist(in, size, i); - ret.cls.push_back(pp.first); - i = skip_unread(in, size, pp.second); - if(i>=size) - throw ztd::format_error("Expecting )", g_origin, in, start-1); + auto pp=parse_list_until(in, size, start, ')'); + ret->cls=pp.first; + i=pp.second; + if(ret->cls.size()<=0) + throw ztd::format_error("Subshell is empty", g_origin, in, start-1); + i++; } - if(ret.cls.size()<=0) - throw ztd::format_error("Subshell is empty", g_origin, in, start-1); - i++; + catch(ztd::format_error& e) + { + delete ret; + throw e; + } + return std::make_pair(ret,i); } + // parse a brace block // must start right after the opening { // ends at } and nothing else -std::pair parse_brace(const char* in, uint32_t size, uint32_t start) +std::pair parse_brace(const char* in, uint32_t size, uint32_t start) { uint32_t i = skip_unread(in, size, start); - block ret(block::brace); - while(in[i] != '}') + brace* ret = new brace; + + try { - auto pp=parse_condlist(in, size, i); - ret.cls.push_back(pp.first); - i = skip_unread(in, size, pp.second); - if(i>=size) - throw ztd::format_error("Expecting }", g_origin, in, start-1); - if(is_in(in[i], ")")) - throw ztd::format_error( strf("Unexpected token: '%c'", in[i]) , g_origin, in, i ); + auto pp=parse_list_until(in, size, start, '}'); + ret->cls=pp.first; + i=pp.second; + if(ret->cls.size()<=0) + throw ztd::format_error("Brace block is empty", g_origin, in, start-1); + i++; + } + catch(ztd::format_error& e) + { + delete ret; + throw e; } - if(ret.cls.size()<=0) - throw ztd::format_error("Brace block is empty", g_origin, in, start-1); - i++; return std::make_pair(ret,i); } -// parse a functions +// parse a function // must start right after the () // then parses a brace block -std::pair parse_function(const char* in, uint32_t size, uint32_t start) +std::pair parse_function(const char* in, uint32_t size, uint32_t start) { - block ret(block::function); uint32_t i=start; + function* ret = new function; - i=skip_unread(in, size, i); - if(in[i] != '{') - throw ztd::format_error("Expecting { after ()", g_origin, in, i); + try + { + i=skip_unread(in, size, i); + if(in[i] != '{') + throw ztd::format_error("Expecting { after ()", g_origin, in, i); + i++; - i++; - auto pp = parse_brace(in, size, i); - ret.cls = pp.first.cls; - i=pp.second; + auto pp=parse_list_until(in, size, i, '}'); + if(pp.first.size()<=0) + throw ztd::format_error("Condition is empty", g_origin, in, i); + + ret->cls=pp.first; + i=pp.second; + i++; + } + catch(ztd::format_error& e) + { + delete ret; + throw e; + } return std::make_pair(ret, i); } -std::pair parse_cmd(const char* in, uint32_t size, uint32_t start) +std::pair parse_cmd(const char* in, uint32_t size, uint32_t start) { - block ret(block::cmd); + cmd* ret = new cmd; uint32_t i=start; - // parse first arg and keep it - auto tp=parse_arg(in, size, i); - i=skip_unread(in, size, tp.second); - if(word_eq("()", in, size, i)) // is a function - { - i += 2; - auto pp = parse_function(in, size, i); - // first arg is function name - pp.first.shebang = tp.first.raw; - return pp; - } - else // is a command + try { auto pp=parse_arglist(in, size, start, true); - ret.args = pp.first; + ret->args = pp.first; i = pp.second; } + catch(ztd::format_error& e) + { + delete ret; + throw e; + } return std::make_pair(ret, i); } @@ -399,141 +638,400 @@ std::pair parse_cmd(const char* in, uint32_t size, uint32_t sta // parse a case block // must start right after the case // ends at } and nothing else -std::pair parse_case(const char* in, uint32_t size, uint32_t start) +std::pair parse_case(const char* in, uint32_t size, uint32_t start) { - block ret(block::case_block); - uint32_t i=skip_chars(in, size, start, " \t");; + uint32_t i=skip_chars(in, size, start, SPACES);; + case_block* ret = new case_block; - // get the treated argument - auto pa = parse_arg(in, size, i); - ret.carg = pa.first; - i=skip_unread(in, size, pa.second); - - // must be an 'in' - if(!word_eq("in", in, size, i, " \t\n")) + try { - std::string pp=get_word(in, size, i, " \t\n"); - throw ztd::format_error("Unexpected word: '"+pp+"', expecting 'in' after case", g_origin, in, i); + // get the treated argument + auto pa = parse_arg(in, size, i); + ret->carg = pa.first; + i=skip_unread(in, size, pa.second); + + // must be an 'in' + if(!word_eq("in", in, size, i, SEPARATORS)) + { + std::string pp=get_word(in, size, i, SEPARATORS).first; + throw ztd::format_error("Unexpected word: '"+pp+"', expecting 'in' after case", g_origin, in, i); + } + + i=skip_unread(in, size, i+2); + + // parse all cases + while(icases.push_back( std::pair() ); + // iterator to last element + auto cc = ret->cases.end()-1; + + // toto) + while(true) + { + pa = parse_arg(in, size, i); + cc->first.push_back(pa.first); + if(pa.first->raw == "") + throw ztd::format_error("Empty case value", g_origin, in, i); + i=skip_unread(in, size, pa.second); + if(i>=size) + throw ztd::format_error("Unexpected end of file. Expecting 'esac'", g_origin, in, i); + if(in[i] == ')') + break; + if(in[i] != '|' && is_in(in[i], SPECIAL_TOKENS)) + throw ztd::format_error( strf("Unexpected token '%c', expecting ')'", in[i]), g_origin, in, i ); + i=skip_unread(in, size, i+1); + } + i++; + + while(true) // blocks + { + auto pc = parse_condlist(in, size, i); + cc->second.push_back(pc.first); + i=pc.second; + + if(i+1>=size) + throw ztd::format_error("Expecting ';;'", g_origin, in, i); + if(in[i] == ')') + throw ztd::format_error( strf("Unexpected token '%c', expecting ';;'", in[i]), g_origin, in, i ); + + // end of case: on same line + if(in[i-1] == ';' && in[i] == ';') + { + i++; + break; + } + + // end of case: on new line + i=skip_unread(in, size, i); + if(word_eq(";;", in, size, i)) + { + i+=2; + break; + } + // end of block: ignore missing ;; + if(word_eq("esac", in, size, i)) + break; + + } + i=skip_unread(in, size, i); + } + + // ended before finding esac + if(i>=size) + throw ztd::format_error("Expecting 'esac'", g_origin, in, i); + i+=4; + } + catch(ztd::format_error& e) + { + if(ret != nullptr) delete ret; + throw e; } - i=skip_unread(in, size, i+2); + return std::make_pair(ret, i); +} - // parse all cases - while(i parse_if(const char* in, uint32_t size, uint32_t start) +{ + if_block* ret = new if_block; + uint32_t i=start; + + try { - // toto) - std::pair, list_t> cc; while(true) { - pa = parse_arg(in, size, i); - if(pa.first.raw == "") - throw ztd::format_error("Empty case value", g_origin, in, i); - cc.first.push_back(pa.first); - i=skip_unread(in, size, pa.second); - if(i>=size ) - throw ztd::format_error("Unexpected end of file. Expecting 'esac'", g_origin, in, i); - if(is_in(in[i], "&;\n#(")) - throw ztd::format_error( strf("Unexpected token '%c', expecting ')'", in[i]), g_origin, in, i ); - if(in[i] == ')') - break; - i=skip_unread(in, size, i+1); - } - i++; + std::pair ll; + std::string word; - while(true) // blocks - { - auto pc = parse_condlist(in, size, i); - cc.second.push_back(pc.first); - i=pc.second; - - if(i+1>=size) - throw ztd::format_error("Expecting ';;'", g_origin, in, i); - if(in[i] == ')') - throw ztd::format_error( strf("Unexpected token '%c', expecting ';;'", in[i]), g_origin, in, i ); - - // end of case: on same line - if(in[i-1] == ';' && in[i] == ';') + try { - i++; + auto pp=parse_list_until(in, size, i, "then"); + if(pp.first.size()<=0) + throw ztd::format_error("Condition is empty", g_origin, in, i); + i=pp.second; + ll.first=pp.first; + + auto tp=parse_list_until(in, size, i, {"fi", "elif", "else"}); + if(std::get<0>(tp).size() <= 0) + throw ztd::format_error("if block is empty", g_origin, in, i); + ll.second = std::get<0>(tp); + i=std::get<1>(tp); + word=std::get<2>(tp); + + ret->blocks.push_back(ll); + } + catch(ztd::format_error& e) + { + for(auto it: ll.first) + delete it; + for(auto it: ll.second) + delete it; + throw e; + } + + if(word == "fi") + break; + if(word == "else") + { + auto pp=parse_list_until(in, size, i, "fi"); + if(pp.first.size()<=0) + throw ztd::format_error("else block is empty", g_origin, in, i); + ret->else_cls=pp.first; + i=pp.second; break; } - // end of case: on new line - i=skip_unread(in, size, i); - if(word_eq(";;", in, size, i)) - { - i+=2; - break; - } - // end of block: ignore missing ;; - if(word_eq("esac", in, size, i)) - break; - } - i=skip_unread(in, size, i); - ret.cases.push_back(cc); + + } + catch(ztd::format_error& e) + { + delete ret; + throw e; } - // ended before finding esac - if(i>=size) - throw ztd::format_error("Expecting 'esac'", g_origin, in, i); - i+=4; + return std::make_pair(ret, i); +} + +std::pair parse_for(const char* in, uint32_t size, uint32_t start) +{ + for_block* ret = new for_block; + uint32_t i=skip_chars(in, size, start, SPACES); + + try + { + auto wp = get_word(in, size, i, ARG_END); + + if(!valid_name(wp.first)) + throw ztd::format_error( strf("Bad identifier in for clause: '%s'", wp.first.c_str()), g_origin, in, i ); + ret->varname = wp.first; + i=skip_chars(in, size, wp.second, SPACES); + + // in + wp = get_word(in, size, i, ARG_END); + if(wp.first == "in") + { + i=skip_chars(in, size, wp.second, SPACES); + auto pp = parse_arglist(in, size, i); + ret->iter = pp.first; + i = pp.second; + } + else if(wp.first != "") + throw ztd::format_error( "Expecting 'in' after for", g_origin, in, i ); + + // end of arg list + if(!is_in(in[i], "\n;#")) + throw ztd::format_error( strf("Unexpected token '%c', expecting '\\n' or ';'", in[i]), g_origin, in, i ); + if(in[i] == ';') + i++; + i=skip_unread(in, size, i); + + // do + wp = get_word(in, size, i, ARG_END); + if(wp.first != "do") + throw ztd::format_error( "Expecting 'do', after for", g_origin, in, i); + i=skip_unread(in, size, wp.second); + + // ops + auto lp = parse_list_until(in, size, i, "done"); + ret->ops=lp.first; + i=lp.second; + } + catch(ztd::format_error& e) + { + delete ret; + throw e; + } + + return std::make_pair(ret, i); +} + +std::pair parse_while(const char* in, uint32_t size, uint32_t start) +{ + while_block* ret = new while_block; + uint32_t i=start; + + try + { + // cond + auto pp=parse_list_until(in, size, i, "do"); + if(pp.first.size() <= 0) + throw ztd::format_error("condition is empty", g_origin, in, i); + + ret->cond = pp.first; + i=pp.second; + + // ops + auto lp = parse_list_until(in, size, i, "done"); + if(lp.first.size() <= 0) + throw ztd::format_error("while is empty", g_origin, in, i); + ret->ops=lp.first; + i=lp.second; + } + catch(ztd::format_error& e) + { + delete ret; + throw e; + } + + return std::make_pair(ret, i); +} + +std::pair parse_fct_or_cmd(const char* in, uint32_t size, uint32_t start) +{ + block* ret = nullptr; + uint32_t i=start; + + try + { + // get first word + auto tp=get_word(in, size, start, ARG_END); + + i=skip_unread(in, size, tp.second); + if(word_eq("()", in, size, i)) // is a function + { + if(!valid_name(tp.first)) + throw ztd::format_error( strf("Bad function name: '%s'", tp.first.c_str()), g_origin, in, start ); + + auto pp = parse_function(in, size, i+2); + // first arg is function name + pp.first->name = tp.first; + ret = pp.first; + i = pp.second; + } + else // is a command + { + auto pp = parse_cmd(in, size, start); + ret = pp.first; + i = pp.second; + } + } + catch(ztd::format_error& e) + { + if(ret!=nullptr) delete ret; + throw e; + } return std::make_pair(ret, i); } // detect if brace, subshell, case or other -std::pair parse_block(const char* in, uint32_t size, uint32_t start) +std::pair parse_block(const char* in, uint32_t size, uint32_t start) { uint32_t i = skip_chars(in, size, start, " \n\t"); - if(i>=size) - throw ztd::format_error("Unexpected end of file", g_origin, in, i); - std::pair ret; - if(in[i] == '{') // brace block + block* ret = nullptr; + + try { - i++; - ret = parse_brace(in, size, i); + if(i>=size) + throw ztd::format_error("Unexpected end of file", g_origin, in, i); + if( in[i] == '(' ) //subshell + { + auto pp = parse_subshell(in, size, i+1); + ret = pp.first; + i = pp.second; + } + else + { + auto wp=get_word(in, size, i, SEPARATORS); + std::string word=wp.first; + if( word == "{" ) // brace block + { + auto pp = parse_brace(in, size, wp.second); + ret = pp.first; + i = pp.second; + } + else if(word == "case") // case + { + auto pp = parse_case(in, size, wp.second); + ret = pp.first; + i = pp.second; + } + else if( word == "if" ) // if + { + auto pp=parse_if(in, size, wp.second); + ret = pp.first; + i = pp.second; + } + else if( word == "for" ) + { + auto pp=parse_for(in, size, wp.second); + ret = pp.first; + i = pp.second; + } + else if( word == "while" ) + { + auto pp=parse_while(in, size, wp.second); + ret = pp.first; + i = pp.second; + } + else if( word == "until") + { + auto pp=parse_while(in, size, wp.second); + pp.first->real_condition()->negate(); + ret = pp.first; + i = pp.second; + } + else if(word_is_reserved(word)) + { + throw ztd::format_error( "Unexpected '"+word+"'" + expecting(g_expecting) , g_origin, in, i); + } + else // other: command/function + { + auto pp = parse_fct_or_cmd(in, size, i); + ret = pp.first; + i = pp.second; + } + + } + + if(ret->type != block::block_cmd) + { + auto pp=parse_arglist(in, size, i, false); // in case of redirects + if(pp.first->args.size()>0) + { + i = pp.second; + ret->redirs=pp.first; + } + else + { + delete pp.first; + } + } } - else if(in[i] == '(') //subshell + catch(ztd::format_error& e) { - i++; - ret = parse_subshell(in, size, i); + if(ret != nullptr) delete ret; + throw e; } - else if(word_eq("case", in, size, i)) - { - ret = parse_case(in, size, i+4); - } - else // command - { - ret = parse_cmd(in, size, i); - } - if(ret.first.args.args.size()<=0) - { - auto pp=parse_arglist(in, size, ret.second, false); // in case of redirects - ret.second=pp.second; - ret.first.args=pp.first; - } - return ret; + + return std::make_pair(ret,i); } // parse main -block parse(const char* in, uint32_t size) +shmain* parse(const char* in, uint32_t size) { - block ret(block::main); + shmain* ret = new shmain(); uint32_t i=0; - // get shebang - if(word_eq("#!", in, size, 0)) + try { - i=skip_until(in, size, 0, "\n"); - ret.shebang=std::string(in, i); + // get shebang + if(word_eq("#!", in, size, 0)) + { + i=skip_until(in, size, 0, "\n"); + ret->shebang=std::string(in, i); + } + i = skip_unread(in, size, i); + // parse all commands + auto pp=parse_list_until(in, size, i, 0); + ret->cls=pp.first; + i=pp.second; } - i = skip_unread(in, size, i); - // parse all commands - while(i -block make_cmd(std::vector args) +const std::string cmd::empty_string=""; + +cmd* make_cmd(std::vector args) { - block cmd(block::cmd); + cmd* ret = new cmd(); + ret->args = new arglist(); for(auto it: args) { - cmd.args.add(arg(it)); + ret->args->add(new arg(it)); } - return cmd; + return ret; } std::vector arglist::strargs(uint32_t start) { std::vector ret; for(uint32_t i=start; iraw); return ret; } void arg::setstring(std::string const& str) { + for(auto it: sa) + delete it; sa.resize(0); - sa.push_back(subarg(str)); + sa.push_back(new subarg_string(str)); raw = str; } -void condlist::add(pipeline const& pl, bool or_op) +std::string arg::string() +{ + if(sa.size() > 1 || sa[0]->type != subarg::s_string) + return ""; + return sa[0]->generate(0); +} + +void condlist::add(pipeline* pl, bool or_op) { if(this->pls.size() > 0) this->or_ops.push_back(or_op); this->pls.push_back(pl); } -block* block::single_cmd() +cmd* block::single_cmd() { - if(this->type == block::subshell) + if(this->type == block::block_subshell) { - if( cls.size() == 1 && // only one condlist - cls[0].pls.size() == 1 && // only one pipeline - cls[0].pls[0].cmds.size() == 1 && // only one block - cls[0].pls[0].cmds[0].type == block::cmd) // block is a command - return &(cls[0].pls[0].cmds[0]); // return command + return dynamic_cast(this)->single_cmd(); + } + if(this->type == block::block_brace) + { + return dynamic_cast(this)->single_cmd(); } return nullptr; } -std::string arg::string() +cmd* subshell::single_cmd() { - if(sa.size() > 1 || sa[0].type != subarg::string) - return ""; - return sa[0].val; + if( cls.size() == 1 && // only one condlist + cls[0]->pls.size() == 1 && // only one pipeline + cls[0]->pls[0]->cmds.size() == 1 && // only one block + cls[0]->pls[0]->cmds[0]->type == block::block_cmd) // block is a command + return dynamic_cast(cls[0]->pls[0]->cmds[0]); // return command + return nullptr; +} + +cmd* brace::single_cmd() +{ + if( cls.size() == 1 && // only one condlist + cls[0]->pls.size() == 1 && // only one pipeline + cls[0]->pls[0]->cmds.size() == 1 && // only one block + cls[0]->pls[0]->cmds[0]->type == block::block_cmd) // block is a command + return dynamic_cast(cls[0]->pls[0]->cmds[0]); // return command + return nullptr; +} + +void condlist::negate() +{ + // invert commands + for(uint32_t i=0; inegated = !pls[i]->negated; + // invert bool operators + for(uint32_t i=0; isize()>0) + return args->args[0]->raw; + return cmd::empty_string; } diff --git a/src/util.cpp b/src/util.cpp index 88d9a48..bebe657 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -4,6 +4,8 @@ #include #include +#include + #include #include @@ -120,19 +122,19 @@ std::string stringReplace(std::string subject, const std::string& search, const return subject; } -void printFormatError(ztd::format_error const& e, bool print_line) -{ - printErrorIndex(e.data(), e.where(), e.what(), e.origin(), print_line); -} - std::string repeatString(std::string const& str, uint32_t n) { std::string ret; for(uint32_t i=0; i