diff --git a/include/minimize.hpp b/include/minimize.hpp new file mode 100644 index 0000000..c6ebded --- /dev/null +++ b/include/minimize.hpp @@ -0,0 +1,26 @@ +#ifndef MINIMIZE_HPP +#define MINIMIZE_HPP + +#include "struc.hpp" + +#include +#include + +extern std::regex re_var_exclude; +extern std::regex re_fct_exclude; + +#define RESERVED_VARIABLES "HOME", "PATH", "SHELL", "PWD" + +std::regex var_exclude_regex(std::string const& in); +std::regex fct_exclude_regex(std::string const& in); + +void list_vars(_obj* in, std::regex exclude); +void list_fcts(_obj* in, std::regex exclude); +void list_cmds(_obj* in, std::regex exclude); + +void minimize_var(_obj* in, std::regex exclude); +void minimize_fct(_obj* in, std::regex exclude); + +void delete_unused_fct(_obj* in, std::regex exclude); + +#endif //MINIMIZE_HPP diff --git a/include/options.hpp b/include/options.hpp index 012207a..0566109 100644 --- a/include/options.hpp +++ b/include/options.hpp @@ -6,8 +6,11 @@ extern ztd::option_set options; extern bool opt_minimize; -extern bool piped; // for cd in substitutions +extern bool g_cd; +extern bool g_include; +extern bool g_resolve; +void get_opts(); ztd::option_set gen_options(); void print_help(const char* arg0); @@ -18,10 +21,8 @@ void print_resolve_help(); /** %include [options] options: - -s single quote contents - -d double quote contents + -C Don't cd -e escape chars. For double quotes - -r include raw text, don't parse. Don't count as included -f include even if already included. Don't count as included */ ztd::option_set create_include_opts(); @@ -29,8 +30,8 @@ ztd::option_set create_include_opts(); /** %resolve [options] options: + -C Don't cd -e escape chars. For double quotes - -p parse as shell code -f ignore non zero return values */ ztd::option_set create_resolve_opts(); diff --git a/include/parse.hpp b/include/parse.hpp index f58aad9..1ceee16 100644 --- a/include/parse.hpp +++ b/include/parse.hpp @@ -10,6 +10,8 @@ #define SPACES " \t" #define SEPARATORS " \t\n" #define ARG_END " \t\n;#()&|" +#define VARNAME_END " \t\n;#()&|=\"'\\{}" +#define BLOCK_TOKEN_END " \t\n;#()&|=\"'\\" #define COMMAND_SEPARATOR "\n;" #define CONTROL_END "#)" #define PIPELINE_END "\n;#()&" @@ -17,11 +19,12 @@ #define SPECIAL_TOKENS "\n;#()&|" #define ALL_TOKENS "\n;#()&|{}" -extern std::string g_origin; +#define SPECIAL_VARS "!#*@$?" std::string import_file(std::string const& path); -shmain* parse(const char* in, uint32_t size); -inline shmain* parse(std::string const& in) { return parse(in.c_str(), in.size()); } +shmain* parse_text(const char* in, uint32_t size, std::string const& filename=""); +inline shmain* parse_text(std::string const& in, std::string const& filename="") { return parse_text(in.c_str(), in.size(), filename); } +inline shmain* parse(std::string const& file) { return parse_text(import_file(file), file); } #endif //PARSE_HPP diff --git a/include/recursive.hpp b/include/recursive.hpp new file mode 100644 index 0000000..db2cc53 --- /dev/null +++ b/include/recursive.hpp @@ -0,0 +1,161 @@ +#ifndef RECURSIVE_HPP +#define RECURSIVE_HPP + +#include + +#include "struc.hpp" + +// boolean value of fct: if true, recurse on this object, if false, skip this object +template +void recurse(void (&fct)(_obj*, Args...), _obj* o, Args... args) +{ + if(o == nullptr) + return; + + // execution + fct(o, args...); + + // recursive calls + switch(o->type) + { + case _obj::_arg : + { + arg* t = dynamic_cast(o); + for(auto it: t->sa) + { + recurse(fct, it, args...); + } + break; + } + case _obj::_arglist : + { + arglist* t = dynamic_cast(o); + for(auto it: t->args) + { + recurse(fct, it, args...); + } + break; + } + case _obj::_pipeline : + { + pipeline* t = dynamic_cast(o); + for(auto it: t->cmds) + { + recurse(fct, it, args...); + } + break; + } + case _obj::_condlist : + { + condlist* t = dynamic_cast(o); + for(auto it: t->pls) + { + recurse(fct, it, args...); + } + break; + } + case _obj::_list : + { + list* t = dynamic_cast(o); + for(auto it: t->cls) + { + recurse(fct, it, args...); + } + break; + } + case _obj::block_subshell : + { + subshell* t = dynamic_cast(o); + recurse(fct, t->lst, args...); + + break; + } + case _obj::block_brace : + { + brace* t = dynamic_cast(o); + recurse(fct, t->lst, args...); + break; + } + case _obj::block_main : + { + shmain* t = dynamic_cast(o); + recurse(fct, t->lst, args...); + + break; + } + case _obj::block_function : + { + function* t = dynamic_cast(o); + recurse(fct, t->lst, args...); + break; + } + case _obj::block_cmd : + { + cmd* t = dynamic_cast(o); + recurse(fct, t->args, args...); + for(auto it: t->var_assigns) + recurse(fct, it.second, args...); + break; + } + case _obj::block_case : + { + case_block* t = dynamic_cast(o); + // carg + recurse(fct, t->carg, args...); + // cases + for(auto sc: t->cases) + { + for(auto it: sc.first) + { + recurse(fct, it, args...); + } + recurse(fct, sc.second, args...); + } + break; + } + case _obj::block_if : + { + if_block* t = dynamic_cast(o); + // ifs + for(auto sc: t->blocks) + { + // condition + recurse(fct, sc.first, args...); + // execution + recurse(fct, sc.second, args...); + } + // else + recurse(fct, t->else_lst, args...); + break; + } + case _obj::block_for : + { + for_block* t = dynamic_cast(o); + // iterations + recurse(fct, t->iter, args...); + // for block + recurse(fct, t->ops, args...); + break; + } + case _obj::block_while : + { + while_block* t = dynamic_cast(o); + // condition + recurse(fct, t->cond, args...); + // operations + recurse(fct, t->ops, args...); + break; + } + case _obj::subarg_subshell : + { + subshell_subarg* t = dynamic_cast(o); + recurse(fct, t->sbsh, args...); + break; + } + + default: break; //do nothing + } +} + + +#endif //RECURSIVE_HPP diff --git a/include/resolve.hpp b/include/resolve.hpp new file mode 100644 index 0000000..eaa16ee --- /dev/null +++ b/include/resolve.hpp @@ -0,0 +1,8 @@ +#ifndef RESOLVE_HPP +#define RESOLVE_HPP + +#include "struc.hpp" + +void resolve(shmain* sh); + +#endif //RESOLVE_HPP diff --git a/include/struc.hpp b/include/struc.hpp index eb2145f..b6bacc3 100644 --- a/include/struc.hpp +++ b/include/struc.hpp @@ -10,32 +10,32 @@ /* structure: -list_t : condlist[] -arglist_t : arg[] block: can be one of - main string (shebang) - list_t (commands) + list (commands) - brace - list_t + list - subshell - list_t + list - cmd: arglist - case - arg (input) - pair[] (cases) + arg (input) + pair[] (cases) - if - pair[] (blocks) - list_t (else) + pair[] (blocks) + list (else) - for string (variable name) arglist (iterations) - list_t (execution) + list (execution) - while - list_t (condition) - list_t (execution) + list (condition) + list (execution) +list: + condlist[] condlist: pipeline[] @@ -49,8 +49,7 @@ pipeline: arglist: arg[] -arg: has - raw +arg: subarg[] can have multiple subarguments if string and subshells subarg: can be one of @@ -71,56 +70,71 @@ class subarg; class cmd; // type pack of condlist -typedef std::vector list_t; typedef std::vector arglist_t; +extern std::string g_origin; + cmd* make_cmd(std::vector args); bool add_include(std::string const& file); -// meta subarg type -class subarg +class _obj { public: - // type - enum argtype { s_string, s_subshell, s_arithmetic }; - argtype type; + enum _objtype { + subarg_string, subarg_variable, subarg_subshell, subarg_arithmetic, + _arg, + _arglist, + _pipeline, + _condlist, + _list, + block_subshell, block_brace, block_main, block_cmd, block_function, block_case, block_if, block_for, block_while, block_until }; + _objtype type; - virtual ~subarg() {;} - - std::string generate(int ind); + virtual ~_obj() {;} + virtual std::string generate(int ind)=0; }; -class arg +// meta subarg type +class subarg : public _obj { public: - arg() { ; } - arg(std::string const& str) {this->setstring(str);} + virtual ~subarg() {;} + virtual std::string generate(int ind)=0; +}; + + +class arg : public _obj +{ +public: + arg() { type=_obj::_arg; } + arg(std::string const& str) { type=_obj::_arg; this->setstring(str);} ~arg() { for(auto it: sa) delete it; } void setstring(std::string const& str); - // has to be set manually - std::string raw; - std::vector sa; // return if is a string and only one subarg std::string string(); + bool equals(std::string const& in) { return this->string() == in; } std::string generate(int ind); }; +inline bool operator==(arg a, std::string const& b) { return a.equals(b); } + // arglist -class arglist +class arglist : public _obj { public: + arglist() { type=_obj::_arglist; } ~arglist() { for( auto it: args ) delete it; } inline void add(arg* in) { args.push_back(in); } inline void push_back(arg* in) { args.push_back(in); } - arglist_t args; + std::vector args; std::vector strargs(uint32_t start); @@ -130,67 +144,12 @@ public: std::string generate(int ind); }; - -// PL - -class pipeline -{ -public: - pipeline() { negated=false; } - pipeline(block* bl) { cmds.push_back(bl); negated=false; } - inline void add(block* bl) { this->cmds.push_back(bl); } - std::vector cmds; - - bool negated; // negated return value (! at start) - - std::string generate(int ind); -}; - -// CL - -class condlist -{ -public: - condlist() { parallel=false; } - condlist(pipeline const& pl) { parallel=false; this->add(new pipeline(pl));} - condlist(pipeline* pl) { parallel=false; this->add(pl);} - - bool parallel; // & at the end - - void add(pipeline* pl, bool or_op=false); - // don't push_back here, use add() instead - std::vector pls; - std::vector or_ops; // size of 1 less than pls, defines separator between pipelines - - void negate(); - - std::string generate(int ind, bool pre_indent=true); -}; - -// class redir -// { -// public: -// enum redirtype { none, write, append, read, raw } ; -// redir(redirtype in=none) { type=in; } -// redirtype type; -// arg val; -// }; - - -class cmd; - // Meta block -class block +class block : public _obj { public: - // type - enum blocktype { block_subshell, block_brace, block_main, block_cmd, block_function, block_case, block_if, block_for, block_while, block_until }; - blocktype type; - - // ctor block() { redirs=nullptr; } virtual ~block() { if(redirs!=nullptr) delete redirs; } - // cmd arglist* redirs; @@ -202,31 +161,95 @@ public: virtual std::string generate(int ind)=0; }; -// block types +// PL -class subshell : public block +class pipeline : public _obj { public: - subshell() { type=block::block_subshell; } - ~subshell() { for(auto it: cls) delete it; } + pipeline(block* bl=nullptr) { type=_obj::_pipeline; if(bl!=nullptr) cmds.push_back(bl); negated=false; } + ~pipeline() { for(auto it: cmds) delete it; } + inline void add(block* bl) { this->cmds.push_back(bl); } + std::vector cmds; - cmd* single_cmd(); - - list_t cls; + bool negated; // negated return value (! at start) std::string generate(int ind); }; -class brace : public block + +// CL + +class condlist : public _obj { public: - brace() { type=block::block_brace; } - ~brace() { - if(redirs!=nullptr) delete redirs; - for(auto it: cls) delete it; } + condlist(pipeline* pl=nullptr) { type=_obj::_condlist; parallel=false; if(pl!=nullptr) this->add(pl); } + ~condlist() { for(auto it: pls) delete it; } - cmd* single_cmd(); + bool parallel; // & at the end - list_t cls; + void add(pipeline* pl, bool or_op=false); + // don't push_back here, use add() instead + std::vector pls; + std::vector or_ops; // size of 1 less than pls, defines separator between pipelines + + void prune_first_cmd(); + + block* first_block(); + cmd* first_cmd(); + cmd* get_cmd(std::string const& cmdname); + + void negate(); + + std::string generate(int ind); +}; + +class list : public _obj +{ +public: + list() { type=_obj::_list; } + ~list() { for(auto it: cls) delete it; } + + std::vector cls; + + condlist* last_cond() { return cls[cls.size()-1]; } + + size_t size() { return cls.size(); } + condlist* operator[](uint32_t i) { return cls[i]; } + + std::string generate(int ind, bool first_indent); + std::string generate(int ind) { return this->generate(ind, true); } +}; + +// class redir +// { +// public: +// enum redirtype { none, write, append, read, raw } ; +// redir(redirtype in=none) { type=in; } +// redirtype type; +// arg val; +// }; + +// block subtypes // + +class cmd : public block +{ +public: + cmd(arglist* in=nullptr) { type=_obj::block_cmd; args=in; } + ~cmd() { + if(args!=nullptr) delete args; + for(auto it: var_assigns) delete it.second; + } + + static const std::string empty_string; + + std::string const& firstarg_string(); + + // preceding var assigns + std::vector> var_assigns; + + // get var assigns in special cmds (export, unset, read) + std::vector arg_vars(); + + arglist* args; std::string generate(int ind); }; @@ -234,45 +257,63 @@ public: class shmain : public block { public: - shmain() { type=block::block_main; } + shmain(list* in=nullptr) { type=_obj::block_main; lst=in; } ~shmain() { - if(redirs!=nullptr) delete redirs; - for(auto it: cls) delete it; } + if(lst!=nullptr) delete lst; + } + bool is_dev_file() { return filename.substr(0,5) == "/dev/"; } + + void concat(shmain* in); + + std::string filename; std::string shebang; - list_t cls; + list* lst; std::string generate(bool print_shebang=true, int ind=0); std::string generate(int ind); }; +class subshell : public block +{ +public: + subshell(list* in=nullptr) { type=_obj::block_subshell; lst=in; } + ~subshell() { + if(lst!=nullptr) delete lst; + } + + cmd* single_cmd(); + + list* lst; + + std::string generate(int ind); +}; + +class brace : public block +{ +public: + brace(list* in=nullptr) { type=_obj::block_brace; lst=in; } + ~brace() { + if(lst!=nullptr) delete lst; + } + + cmd* single_cmd(); + + list* lst; + + std::string generate(int ind); +}; + class function : public block { public: - function() { type=block::block_function; } + function(list* in=nullptr) { type=_obj::block_function; lst=in; } ~function() { - if(redirs!=nullptr) delete redirs; - for(auto it: cls) delete it; } + if(lst!=nullptr) delete lst; + } std::string name; - list_t cls; - - std::string generate(int ind); -}; - -class cmd : public block -{ -public: - cmd(arglist* in=nullptr) { type=block::block_cmd; args=in; } - ~cmd() { - if(redirs!=nullptr) delete redirs; - if(args!=nullptr) delete args; } - - static const std::string empty_string; - - std::string const& firstarg_raw(); - - arglist* args; + list* lst; std::string generate(int ind); }; @@ -280,21 +321,19 @@ public: class case_block : public block { public: - case_block(arg* in=nullptr) { type=block::block_case; carg=in; } + case_block(arg* in=nullptr) { type=_obj::block_case; carg=in; } ~case_block() { - if(redirs!=nullptr) delete redirs; if(carg!=nullptr) delete carg; for( auto cit : cases ) { for( auto ait : cit.first ) delete ait; - for( auto lit : cit.second ) - delete lit; + if(cit.second != nullptr) delete cit.second; } } arg* carg; - std::vector< std::pair > cases; + std::vector< std::pair, list*> > cases; std::string generate(int ind); }; @@ -302,22 +341,19 @@ public: class if_block : public block { public: - if_block() { type=block::block_if; } + if_block() { type=_obj::block_if; else_lst=nullptr; } ~if_block() { - if(redirs!=nullptr) delete redirs; - for(auto it: else_cls) delete it; for(auto ifb: blocks) { - for(auto it: ifb.first) - delete it; - for(auto it: ifb.second) - delete it; + if(ifb.first!=nullptr) delete ifb.first; + if(ifb.second!=nullptr) delete ifb.second; } + if(else_lst!=nullptr) delete else_lst; } - std::vector< std::pair > blocks; + std::vector< std::pair > blocks; - list_t else_cls; + list* else_lst; std::string generate(int ind); }; @@ -325,17 +361,16 @@ public: class for_block : public block { public: - for_block(std::string const& name="", arglist* args=nullptr) { type=block::block_for; varname=name; iter=args; } + for_block(std::string const& name="", arglist* args=nullptr, list* lst=nullptr) { type=_obj::block_for; varname=name; iter=args; ops=lst; } ~for_block() { - if(redirs!=nullptr) delete redirs; if(iter!=nullptr) delete iter; - for(auto it: ops) delete it; + if(ops!=nullptr) delete ops; } std::string varname; arglist* iter; - list_t ops; + list* ops; std::string generate(int ind); }; @@ -343,51 +378,63 @@ public: class while_block : public block { public: - while_block() { type=block::block_while; } + while_block(list* a=nullptr, list* b=nullptr) { type=_obj::block_while; cond=a; ops=b; } ~while_block() { - if(redirs!=nullptr) delete redirs; - for(auto it: cond) delete it; - for(auto it: ops) delete it; + if(cond!=nullptr) delete cond; + if(ops!=nullptr) delete ops; } - condlist* real_condition() { return *(cond.end()-1); } + condlist* real_condition() { return cond->last_cond(); } - list_t cond; - list_t ops; + list* cond; + list* ops; std::string generate(int ind); }; -// Subarg subtypes +// Subarg subtypes // -class subarg_string : public subarg +class string_subarg : public subarg { public: - subarg_string(std::string const& in="") { type=subarg::s_string; val=in; } + string_subarg(std::string const& in="") { type=_obj::subarg_string; val=in; } + ~string_subarg() {;} std::string val; std::string generate(int ind) { return val; } }; -class subarg_arithmetic : public subarg +class variable_subarg : public subarg { public: - subarg_arithmetic() { type=subarg::s_arithmetic; } + variable_subarg(std::string const& in="") { type=_obj::subarg_variable; varname=in; } + ~variable_subarg() {;} + + std::string varname; + + std::string generate(int ind) { return "$" + varname; } +}; + +class arithmetic_subarg : public subarg +{ +public: + arithmetic_subarg() { type=_obj::subarg_arithmetic; } + ~arithmetic_subarg() {;} std::string val; std::string generate(int ind) { return "$(("+val+"))"; } }; -class subarg_subshell : public subarg +class subshell_subarg : public subarg { public: - subarg_subshell(subshell* in=nullptr) { type=subarg::s_subshell; sbsh=in; } - subarg_subshell(subshell in) { type=subarg::s_subshell; sbsh=new subshell(in); } - ~subarg_subshell() { if(sbsh != nullptr) delete sbsh;} + subshell_subarg(subshell* in=nullptr, bool inq=false) { type=_obj::subarg_subshell; sbsh=in; quoted=inq; } + ~subshell_subarg() { if(sbsh != nullptr) delete sbsh; } subshell* sbsh; + bool quoted; std::string generate(int ind); }; diff --git a/include/util.hpp b/include/util.hpp index 8f7dd27..3d79be2 100644 --- a/include/util.hpp +++ b/include/util.hpp @@ -6,15 +6,21 @@ #include #include #include +#include +#include +#include +#include +#include #include -#define INDENT indent(ind) +#include "struc.hpp" extern std::string indenting_string; std::string indent(int n); +std::vector split(std::string const& in, const char* splitters); std::vector split(std::string const& in, char c); std::string escape_str(std::string const& in); @@ -30,9 +36,81 @@ std::string strf( const std::string& format, Args ... args ) return std::string( buf.get(), buf.get() + size - 1 ); // We don't want the '\0' inside } + +template +std::vector> sort_by_value(std::map const& in) +{ + typedef std::pair pair_t; + // create an empty vector of pairs + std::vector ret; + + // copy key-value pairs from the map to the vector + std::copy(in.begin(), + in.end(), + std::back_inserter>(ret)); + + // sort the vector by increasing order of its pair's second value + // if second value are equal, order by the pair's first value + std::sort(ret.begin(), ret.end(), + [](const pair_t& l, const pair_t& r) { + if (l.second != r.second) + return l.second > r.second; + return l.first > r.first; + }); + return ret; +} + +inline bool is_in(char c, const char* set) { + return strchr(set, c) != NULL; +} + +template +std::set prune_matching(std::map& in, std::regex re) +{ + std::set ret; + auto it=in.begin(); + auto prev=in.end(); + while(it!=in.end()) + { + if( std::regex_match(it->first, re) ) + { + ret.insert(it->first); + in.erase(it); + if(prev == in.end()) + it = in.begin(); + else + { + it = prev; + it++; + } + } + else + { + prev=it; + it++; + } + } + return ret; +} + +template +std::set map_to_set(std::map in) +{ + std::set ret; + for(auto it: in) + { + ret.insert(it.first); + } + return ret; +} + +void concat_sets(std::set& a, std::set const& b); + +std::set prune_matching(std::set& in, std::regex re); + std::string delete_brackets(std::string const& in); -std::string pwd(); +std::string concatargs(std::vector const& args); int _exec(std::string const& bin, std::vector const& args); @@ -41,4 +119,6 @@ std::string stringReplace(std::string subject, const std::string& search, const void printFormatError(ztd::format_error const& e, bool print_line=true); void printErrorIndex(const char* in, const int index, const std::string& message, const std::string& origin, bool print_line=true); +int execute(shmain* sh, std::vector& args); + #endif //UTIL_HPP diff --git a/src/generate.cpp b/src/generate.cpp index f210ef8..4c35616 100644 --- a/src/generate.cpp +++ b/src/generate.cpp @@ -6,8 +6,6 @@ #include "options.hpp" #include "parse.hpp" -#include - std::vector included; bool is_sub_special_cmd(std::string in) @@ -67,13 +65,11 @@ std::string pipeline::generate(int ind) return ret; } -std::string condlist::generate(int ind, bool pre_indent) +std::string condlist::generate(int ind) { std::string ret; if(pls.size() <= 0) return ""; - if(pre_indent) - ret += indented("", ind); ret += pls[0]->generate(ind); for(uint32_t i=0 ; igenerate(ind), ind); + } + else + { + first_indent=true; + ret += cls[i]->generate(ind); + } + } + return ret; +} + bool add_include(std::string const& file) { std::string truepath=ztd::exec("readlink", "-f", file).first; @@ -106,155 +123,6 @@ bool add_include(std::string const& file) return true; } -std::string concatargs(std::vector args) -{ - std::string ret; - for(auto it: args) - ret += it + ' '; - ret.pop_back(); - return ret; -} - -std::string generate_resolve(std::vector args, int ind) -{ - std::string ret; - - auto opts=create_resolve_opts(); - auto rargs = opts.process(args, false, true, false); - - std::string cmd=concatargs(rargs); - std::string dir; - - if(!opts['C'] && !piped) - { - dir=pwd(); - std::string cddir=ztd::exec("dirname", g_origin).first; - cddir.pop_back(); - if(chdir(cddir.c_str()) != 0) - throw std::runtime_error("Cannot cd to '"+cddir+"'"); - } - - // exec call - auto p=ztd::shp("exec "+cmd); - - if(!opts['f'] && p.second!=0) - { - throw std::runtime_error( strf("command `%s` returned %u", cmd.c_str(), p.second) ); - } - while(p.first[p.first.size()-1] == '\n') - p.first.pop_back(); - - if(opts['p']) - { - shmain* sh; - try - { - sh = parse(p.first); - } - catch(ztd::format_error& e) - { - throw ztd::format_error(e.what(), "command `"+cmd+'`', e.data(), e.where()); - } - ret = sh->generate(false, ind); - delete sh; - ret = ret.substr(indent(ind).size()); - if(ret[ret.size()-1] != '\n') - ret += '\n'; - } - else - { - ret = p.first; - } - - if(!opts['C'] && !piped) - if(chdir(dir.c_str()) != 0) - throw std::runtime_error("Cannot cd to '"+dir+"'"); - - return ret; -} - -std::string generate_include(std::vector args, int ind) -{ - std::string ret; - - auto opts=create_include_opts(); - auto rargs = opts.process(args, false, true, false); - - std::string quote; - if(opts['s']) - quote = '\''; - else if(opts['d']) - quote = '"'; - - std::string curfile=g_origin; - std::string dir; - - if(!opts['C'] && !piped) - { - dir=pwd(); - std::string cddir=ztd::exec("dirname", curfile).first; - cddir.pop_back(); - if(chdir(cddir.c_str()) != 0) - throw std::runtime_error("Cannot cd to '"+cddir+"'"); - } - - // do shell resolution - std::string command="for I in "; - for(auto it: rargs) - command += it + ' '; - command += "; do echo $I ; done"; - std::string inc=ztd::sh(command); - - auto v = split(inc, '\n'); - - std::string file; - shmain* bl=nullptr; - bool indent_remove=true; - - for(auto it : v) - { - if( opts['f'] || // force include - add_include(it) ) // not already included - { - file=import_file(it); - if(opts['d']) - file = stringReplace(file, "\"", "\\\""); - if(opts['s']) - file = stringReplace(file, "'", "'\\''"); - if(opts['r']) - ret += file; - else - { - g_origin=it; - try - { - bl = parse(quote + file + quote); - } - catch(ztd::format_error& e) - { - throw ztd::format_error(e.what(), it, e.data(), e.where()); - } - file = bl->generate(false, ind); - if(file[file.size()-1] != '\n') - file += '\n'; - delete bl; - if(indent_remove) - { - indent_remove=false; - file = file.substr(indent(ind).size()); - } - ret += file; - } - } - } - if(!opts['C'] && !piped) - if(chdir(dir.c_str()) != 0) - throw std::runtime_error("Cannot cd to '"+dir+"'"); - g_origin=curfile; - - return ret; -} - // BLOCK std::string block::generate_redirs(int ind) @@ -280,26 +148,24 @@ std::string if_block::generate(int ind) { // condition if(i==0) - ret += "if "; + ret += "if"; else - ret += "elif "; - // first cmd: on same line with no indent - ret += blocks[i].first[0]->generate(ind+1, false); - // other cmds: on new lines - for(uint32_t j=1; jgenerate(ind+1); + ret += "elif"; + + if(blocks[i].first->size()==1) + ret += ' ' + blocks[i].first->generate(ind+1, false); + else + ret += '\n' + blocks[i].first->generate(ind+1); // execution ret += indented("then\n", ind); - for(auto it: blocks[i].second) - ret += it->generate(ind+1); + ret += blocks[i].second->generate(ind+1); } - if(else_cls.size()>0) + if(else_lst!=nullptr) { ret += indented("else\n", ind); - for(auto it: else_cls) - ret += it->generate(ind+1); + ret += else_lst->generate(ind+1); } ret += indented("fi", ind); @@ -315,8 +181,7 @@ std::string for_block::generate(int ind) ret += " in " + iter->generate(ind); ret += '\n'; ret += indented("do\n", ind); - for(auto it: ops) - ret += it->generate(ind+1); + ret += ops->generate(ind+1); ret += indented("done", ind); return ret; @@ -327,19 +192,13 @@ std::string while_block::generate(int ind) std::string ret; ret += "while"; - if(cond.size() == 1) - { - ret += " " + cond[0]->generate(ind+1, false); - } + if(cond->size() == 1) + ret += " " + cond->generate(ind+1, false); else - { - ret += '\n'; - for(auto it: cond) - ret += it->generate(ind+1); - } + ret += '\n' + cond->generate(ind+1); + ret += indented("do\n", ind); - for(auto it: ops) - ret += it->generate(ind+1); + ret += ops->generate(ind+1); ret += indented("done", ind); return ret; @@ -352,8 +211,7 @@ std::string subshell::generate(int ind) ret += '('; if(!opt_minimize) ret += '\n'; // commands - for(auto it: cls) - ret += it->generate(ind+1); + ret += lst->generate(ind+1); if(opt_minimize && ret.size()>1) ret.pop_back(); // ) can be right after command // close subshell @@ -373,8 +231,7 @@ std::string shmain::generate(bool print_shebang, int ind) std::string ret; if(print_shebang && shebang!="") ret += shebang + '\n'; - for(auto it: cls) - ret += it->generate(ind); + ret += lst->generate(ind); if( opt_minimize && ret[ret.size()-1] == '\n') ret.pop_back(); return ret; @@ -383,10 +240,9 @@ std::string shmain::generate(bool print_shebang, int ind) std::string brace::generate(int ind) { std::string ret; - ret += "{\n" ; - for(auto it: cls) - ret += it->generate(ind+1); + ret += "{\n" ; + ret += lst->generate(ind+1); ret += indented("}", ind); ret += generate_redirs(ind); @@ -399,11 +255,10 @@ std::string function::generate(int ind) std::string ret; // function definition ret += name + "()"; - if(!opt_minimize) ret += '\n' + indent(ind); - ret += "{\n"; + if(!opt_minimize) ret += '\n'; // commands - for(auto it: cls) - ret += it->generate(ind+1); + ret += indented("{\n", ind); + ret += lst->generate(ind+1); ret += indented("}", ind); ret += generate_redirs(ind); @@ -427,15 +282,19 @@ std::string case_block::generate(int ind) ret += ')'; if(!opt_minimize) ret += '\n'; // commands - for(auto it: cs.second) - ret += it->generate(ind+1); + ret += cs.second->generate(ind+1); // end of case: ;; if(opt_minimize && ret[ret.size()-1] == '\n') // ;; can be right after command - { ret.pop_back(); - } ret += indented(";;\n", ind+1); } + + // remove ;; from last case + if(opt_minimize) + { + ret.erase(ret.size()-3, 2); + } + // close case ind--; ret += indented("esac", ind); @@ -448,19 +307,32 @@ std::string case_block::generate(int ind) std::string cmd::generate(int ind) { std::string ret; + // var assigns + for(auto it: var_assigns) + ret += it.first + '=' + it.second->generate(ind) + ' '; + if(args==nullptr || args->size()<=0) - return ""; - std::string cmdname=(*args)[0]->raw; - if(cmdname == "%include" || cmdname == "%include_s") { - ret += generate_include(args->strargs(1), ind); + ret.pop_back(); + return ret; } - else if(cmdname == "%resolve" || cmdname == "%resolve_s") - { - ret += generate_resolve(args->strargs(1), ind); - } - else - ret = args->generate(ind); + + // command + ret += args->generate(ind); + // delete potential trailing space + if(ret[ret.size()-1] == ' ') + ret.pop_back(); + + return ret; +} + +// SUBARG + +std::string subshell_subarg::generate(int ind) +{ + std::string ret; + ret += '$'; + ret += sbsh->generate(ind); return ret; } @@ -471,39 +343,3 @@ std::string cmd::generate(int ind) // std::string ret; // return ret; // } - -// SUBARG - -std::string subarg::generate(int ind) -{ - switch(type) - { - case subarg::s_string: - return dynamic_cast(this)->generate(ind); - case subarg::s_arithmetic: - return dynamic_cast(this)->generate(ind); - case subarg::s_subshell: - return dynamic_cast(this)->generate(ind); - } - // doesn't happen, just to get rid of warning - return ""; -} - -std::string subarg_subshell::generate(int ind) -{ - std::string ret; - // includes and resolves inside command substitutions - // resolve here and not inside subshell - cmd* cmd = sbsh->single_cmd(); - if( cmd != nullptr && (cmd->firstarg_raw() == "%include" || cmd->firstarg_raw() == "%resolve") ) - { - ret += cmd->generate(ind); - } - // regular substitution - else - { - ret += '$'; - ret += sbsh->generate(ind); - } - return ret; -} diff --git a/src/main.cpp b/src/main.cpp index 7a58ca2..bcf0752 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -11,83 +11,54 @@ #include "struc.hpp" #include "parse.hpp" #include "options.hpp" +#include "recursive.hpp" +#include "minimize.hpp" +#include "resolve.hpp" -int execute(shmain* sh, std::vector& args) +void oneshot_opt_process(const char* arg0) { - std::string data=sh->generate(); - - std::string filename=ztd::exec("basename", args[0]).first; - filename.pop_back(); - - // generate path - std::string tmpdir = (getenv("TMPDIR") != NULL) ? getenv("TMPDIR") : "/tmp" ; - std::string dirpath = tmpdir + "/lxsh_" + ztd::sh("tr -dc '[:alnum:]' < /dev/urandom | head -c10"); - std::string filepath = dirpath+'/'+filename; - - // create dir - if(ztd::exec("mkdir", "-p", dirpath).second) - { - throw std::runtime_error("Failed to create directory '"+dirpath+'\''); - } - - // create stream - std::ofstream stream(filepath); - if(!stream) - { - ztd::exec("rm", "-rf", dirpath); - throw std::runtime_error("Failed to write to '"+filepath+'\''); - } - - // output - stream << data; - stream.close(); - if(ztd::exec("chmod", "+x", filepath).second != 0) - { - ztd::exec("rm", "-rf", dirpath); - throw std::runtime_error("Failed to make '"+filepath+"' executable"); - } - - // exec - int retval=_exec(filepath, args); - ztd::exec("rm", "-rf", dirpath); - - return retval; -} - -int main(int argc, char* argv[]) -{ - auto args=options.process(argc, argv, false, true); - - if(options['m']) - opt_minimize=true; - - piped=false; - if(options['h']) { - print_help(argv[0]); - return 1; + print_help(arg0); + exit(1); } if(options["help-commands"]) { print_include_help(); printf("\n\n"); print_resolve_help(); + exit(1); + } +} + +int main(int argc, char* argv[]) +{ + std::vector args; + + int ret=0; + + try + { + args=options.process(argc, argv, false, true); + } + catch(std::exception& e) + { + std::cerr << e.what() << std::endl; return 1; } - // resolve input std::string file; if(args.size() > 0) // argument provided { if(args[0] == "-" || args[0] == "/dev/stdin") //stdin { - piped=true; file = "/dev/stdin"; } else + { file=args[0]; + } } else { @@ -98,35 +69,91 @@ int main(int argc, char* argv[]) } else // is piped { - piped=true; file = "/dev/stdin"; args.push_back("/dev/stdin"); } } - // set origin file - g_origin=file; - add_include(file); + // parsing - shmain* sh=nullptr; + shmain* sh = new shmain(new list); + shmain* tsh = nullptr; try { - // parse - sh = parse(import_file(file)); - // resolve shebang - std::string curbin, binshebang; - curbin=ztd::exec("basename", argv[0]).first; - binshebang=ztd::exec("basename", sh->shebang).first; - if(binshebang==curbin) - sh->shebang="#!/bin/sh"; - // process - if(options['e']) // force exec + bool is_exec = false; + bool first_run = true; + + // do parsing + for(uint32_t i=0 ; ishebang).first; + if(shebang_is_bin) + tsh->shebang="#!/bin/sh"; + + // detect if need execution + if(options['e']) + is_exec=true; + else if(options['c'] || options['o']) + is_exec=false; + else + is_exec = shebang_is_bin; + + if(!is_exec && args.size() > 1) // not exec: parse options on args + { + std::string t=args[0]; + args=options.process(args); + } + + oneshot_opt_process(argv[0]); + get_opts(); + } + + /* mid processing */ + // resolve/include + if(g_include || g_resolve) + resolve(tsh); + + // concatenate to main + sh->concat(tsh); + delete tsh; + tsh = nullptr; + + // is exec: break and exec + if(is_exec) + break; } - else if(options['c']) // force console out + + // processing before output + if(options['m']) + opt_minimize=true; + if(options["minimize-var"]) + minimize_var( sh, re_var_exclude ); + if(options["minimize-fct"]) + minimize_fct( sh, re_fct_exclude ); + if(options["remove-unused"]) + delete_unused_fct( sh, re_fct_exclude ); + + if(options["list-var"]) + list_vars(sh, re_var_exclude); + else if(options["list-fct"]) + list_fcts(sh, re_var_exclude); + else if(options["list-cmd"]) + list_cmds(sh, re_var_exclude); + else if(is_exec) { - std::cout << sh->generate(); + ret = execute(sh, args); } else if(options['o']) // file output { @@ -138,34 +165,31 @@ int main(int argc, char* argv[]) std::ofstream(destfile) << sh->generate(); // don't chmod on /dev/ if(destfile.substr(0,5) != "/dev/") - ztd::exec("chmod", "+x", destfile); + ztd::exec("chmod", "+x", destfile); } - else // other process + else // to console { - if(binshebang == curbin) // exec if shebang is program - { - return execute(sh, args); - } - else // output otherwise - { - std::cout << sh->generate(); - } + std::cout << sh->generate(); } } catch(ztd::format_error& e) { + if(tsh != nullptr) + delete tsh; + delete sh; printFormatError(e); return 100; } catch(std::exception& e) { + if(tsh != nullptr) + delete tsh; + delete sh; std::cerr << e.what() << std::endl; return 2; } - if(sh!=nullptr) - delete sh; + delete sh; - - return 0; + return ret; } diff --git a/src/minimize.cpp b/src/minimize.cpp new file mode 100644 index 0000000..32903fa --- /dev/null +++ b/src/minimize.cpp @@ -0,0 +1,384 @@ +#include "minimize.hpp" + +#include +#include +#include + +#include "recursive.hpp" +#include "parse.hpp" +#include "util.hpp" + +std::regex re_var_exclude; +std::regex re_fct_exclude; + +std::vector get_list(std::string const& in) +{ + return split(in, ", \t\n"); +} + +std::regex gen_regex_from_list(std::vector const& in) +{ + std::string re; + for(auto it: in) + re += '('+it+")|"; + if(re.size()>0) + re.pop_back(); + return std::regex(re); +} + +std::vector gen_var_excludes(std::string const& in) +{ + std::vector ret = {RESERVED_VARIABLES, strf("[0-9%s]", SPECIAL_VARS)}; + auto t = get_list(in); + ret.insert(ret.end(), t.begin(), t.end()); + return ret; +} + +std::regex var_exclude_regex(std::string const& in) +{ + return gen_regex_from_list(gen_var_excludes(in)); +} +std::regex fct_exclude_regex(std::string const& in) +{ + return gen_regex_from_list(get_list(in)); +} + +std::vector cmd::arg_vars() +{ + std::vector ret; + if(args==nullptr || args->size()<=0) + return ret; + std::string cmdname=this->firstarg_string(); + + if(cmdname == "export" || cmdname == "unset" || cmdname == "local") + { + for(uint32_t i=1; isize(); i++) + { + arg* ta = args->args[i]; + if(ta->sa.size() < 1 || ta->sa[0]->type != _obj::subarg_string) + continue; + if(ta->sa.size() == 1) + { + if( std::regex_match(ta->sa[0]->generate(0), std::regex("[a-zA-Z_][0-9a-zA-Z_]*([=](.*)?)?") ) ) + ret.push_back(ta->sa[0]); + } + else if(ta->sa.size() > 1) + { + if( std::regex_match(ta->sa[0]->generate(0), std::regex("[a-zA-Z_][0-9a-zA-Z_]*=.*") ) ) + ret.push_back(ta->sa[0]); + } + } + } + + return ret; +} + +std::string get_varname(subarg* in) +{ + if(in->type != _obj::subarg_string) + return ""; + std::string& t=dynamic_cast(in)->val; + size_t i=t.find('='); + if(i!=std::string::npos) + return t.substr(0, i); + else + return t; +} + +/** VAR RECURSE **/ + +void get_map_varname(_obj* in, std::map* variable_map) +{ + if(variable_map == nullptr) + return; + switch(in->type) + { + case _obj::subarg_variable: { + variable_subarg* t = dynamic_cast(in); + if(!variable_map->insert( std::make_pair(t->varname, 1) ).second) + (*variable_map)[t->varname]++; + }; break; + case _obj::block_for: { + for_block* t = dynamic_cast(in); + if(!variable_map->insert( std::make_pair(t->varname, 1) ).second) + (*variable_map)[t->varname]++; + }; break; + case _obj::block_cmd: { + cmd* t = dynamic_cast(in); + for(auto it: t->var_assigns) + if(!variable_map->insert( std::make_pair(it.first, 1) ).second) + (*variable_map)[it.first]++; + for(auto it: t->arg_vars()) + { + std::string varname=get_varname(it); + if(!variable_map->insert( std::make_pair(varname, 1) ).second) + (*variable_map)[varname]++; + } + }; break; + default: break; + } +} + +void replace_varname(_obj* in, std::map* varmap) +{ + switch(in->type) + { + case _obj::subarg_variable: { + variable_subarg* t = dynamic_cast(in); + auto el=varmap->find(t->varname); + if(el!=varmap->end()) + t->varname = el->second; + }; break; + case _obj::block_for: { + for_block* t = dynamic_cast(in); + auto it=varmap->find(t->varname); + if(it!=varmap->end()) + t->varname = it->second; + }; break; + case _obj::block_cmd: { + cmd* t = dynamic_cast(in); + for(auto it=t->var_assigns.begin() ; it!=t->var_assigns.end() ; it++) + { + auto el=varmap->find(it->first); + if(el!=varmap->end()) + it->first = el->second; + } + for(auto it: t->arg_vars()) + { + string_subarg* t = dynamic_cast(it); + auto el=varmap->find(get_varname(t)); + if(el!=varmap->end()) + { + size_t tpos=t->val.find('='); + if(tpos == std::string::npos) + t->val = el->second; + else + t->val = el->second + t->val.substr(tpos); + } + } + }; break; + default: break; + } +} + +/** FCT RECURSE **/ + +void get_map_cmd(_obj* in, std::map* all_cmds) +{ + if(all_cmds == nullptr) + return; + + switch(in->type) + { + case _obj::block_cmd: { + cmd* t = dynamic_cast(in); + std::string cmdname = t->firstarg_string(); + if(cmdname != "" && !all_cmds->insert( std::make_pair(cmdname, 1) ).second) + (*all_cmds)[cmdname]++; + }; break; + default: break; + } +} + +void get_map_fctname(_obj* in, std::map* fct_map) +{ + if(fct_map == nullptr) + return; + switch(in->type) + { + case _obj::block_function: { + function* t = dynamic_cast(in); + if(!fct_map->insert( std::make_pair(t->name, 1) ).second) + (*fct_map)[t->name]++; + }; break; + default: break; + } +} + +void replace_fctname(_obj* in, std::map* fctmap) +{ + switch(in->type) + { + case _obj::block_function: { + function* t = dynamic_cast(in); + auto el=fctmap->find(t->name); + if(el!=fctmap->end()) + t->name = el->second; + }; break; + case _obj::block_cmd: { + cmd* t = dynamic_cast(in); + std::string cmdname = t->firstarg_string(); + auto el=fctmap->find(cmdname); + if(el!=fctmap->end()) + { + delete t->args->args[0]; + t->args->args[0] = new arg(el->second); + } + }; break; + default: break; + } +} + +void delete_fcts(_obj* in, std::set* fcts) +{ + switch(in->type) + { + case _obj::_list: { + list* t = dynamic_cast(in); + for(uint32_t i=0; icls.size(); i++) + { + block* tb = t->cls[i]->first_block(); + if(tb != nullptr && tb->type == _obj::block_function) + { + function* fc = dynamic_cast(tb); + if(fcts->find(fc->name)!=fcts->end()) + { + delete t->cls[i]; + t->cls.erase(t->cls.begin()+i); + i--; + } + } + } + } + default: break; + } +} + +/** name things **/ + +char nchar(uint32_t n) +{ + if(n<26) + return 'a'+n; + else if(n<52) + return 'A'+(n-26); + else if(n==52) + return '_'; + else if(n<63) + return '0'+(n-53); + else + return 0; +} + +std::string minimal_name(uint32_t n) +{ + if(n<53) + { + std::string ret; + ret += nchar(n); + return ret; + } + else + { + uint32_t k=n%53; + uint32_t q=n/53; + std::string ret; + ret += nchar(k); + ret += nchar(q); + while(q>64) + { + q /= 64; + ret += nchar(q); + } + return ret; + } +} + +std::map gen_map(std::map const& vars, std::set excluded) +{ + std::map ret; + auto ordered = sort_by_value(vars); + uint32_t n=0; + for(std::pair it: ordered) + { + std::string newname; + do { + newname = minimal_name(n); + n++; + } while( excluded.find(newname) != excluded.end() ); + ret.insert(std::make_pair(it.first, newname)); + } + return ret; +} + +void minimize_var(_obj* in, std::regex exclude) +{ + std::map vars; + std::set excluded; + std::map varmap; + // get vars + recurse(get_map_varname, in, &vars); + // remove excluded + excluded = prune_matching(vars, exclude); + // create mapping + varmap=gen_map(vars, excluded); + // perform replace + recurse(replace_varname, in, &varmap); +} + +void minimize_fct(_obj* in, std::regex exclude) +{ + std::map fcts, cmdmap; + std::set allcmds, excluded; + std::map fctmap; + // get fcts + recurse(get_map_fctname, in, &fcts); + // get cmds + recurse(get_map_cmd, in, &cmdmap); + allcmds=map_to_set(cmdmap); + // remove excluded + excluded = prune_matching(fcts, exclude); + // concatenate excluded to commands + concat_sets(allcmds, excluded); + // create mapping + fctmap=gen_map(fcts, allcmds); + // perform replace + recurse(replace_fctname, in, &fctmap); +} + +void delete_unused_fct(_obj* in, std::regex exclude) +{ + std::map fctmap, cmdmap; + std::set unused; + // get fcts + recurse(get_map_fctname, in, &fctmap); + // get cmds + recurse(get_map_cmd, in, &cmdmap); + // remove excluded + prune_matching(fctmap, exclude); + for(auto it: fctmap) + { + if(cmdmap.find(it.first) == cmdmap.end()) + unused.insert(it.first); + } + if(unused.size()>0) + recurse(delete_fcts, in, &unused); +} + +void list_stuff(_obj* in, std::regex exclude, void (&fct)(_obj*,std::map*) ) +{ + std::map map; + recurse(fct, in, &map); + prune_matching(map, exclude); + uint32_t max=0; + for(auto it: map) + if(it.second > max) + max=it.second; + for(auto it: map) + printf("%*d %s\n", (uint32_t)log10(max)+1, it.second, it.first.c_str()); +} + +void list_vars(_obj* in, std::regex exclude) +{ + list_stuff(in, exclude, get_map_varname); +} + +void list_fcts(_obj* in, std::regex exclude) +{ + list_stuff(in, exclude, get_map_fctname); +} + +void list_cmds(_obj* in, std::regex exclude) +{ + list_stuff(in, exclude, get_map_cmd); +} diff --git a/src/options.cpp b/src/options.cpp index a99e685..296675a 100644 --- a/src/options.cpp +++ b/src/options.cpp @@ -1,29 +1,63 @@ #include "options.hpp" +#include "minimize.hpp" + ztd::option_set options = gen_options(); -bool opt_minimize; -bool piped=false; +bool opt_minimize=false; + +bool g_cd=false; +bool g_include=true; +bool g_resolve=true; ztd::option_set gen_options() { ztd::option_set ret; - ret.add(ztd::option('h', "help", false, "Display this help message")); - ret.add(ztd::option('m', "minimize", false, "Minimize code")); - ret.add(ztd::option('e', "exec", false, "Directly execute script")); - ret.add(ztd::option('o', "output", true , "Output result script to file", "file")); - ret.add(ztd::option('c', "stdout", false, "Output result script to stdout")); - ret.add(ztd::option("help-commands", false, "Print help for linker commands")); + ret.add( + ztd::option("\r [Help]"), + ztd::option('h', "help", false, "Display this help message"), + ztd::option("help-commands", false, "Print help for linker commands"), + ztd::option("\r [Processing]"), + ztd::option('C', "no-cd", false, "Don't cd when doing %include and %resolve"), + ztd::option('m', "minimize", false, "Minimize code without changing functionality"), + ztd::option("no-include", false, "Don't resolve %include commands"), + ztd::option("no-resolve", false, "Don't resolve %resolve commands"), + ztd::option("\r [Result]"), + ztd::option('o', "output", true , "Output result script to file", "file"), + ztd::option('c', "stdout", false, "Output result script to stdout"), + ztd::option('e', "exec", false, "Directly execute script"), + ztd::option("\r [var/fct processing]"), + ztd::option("minimize-var", false, "Minimize variable names"), + ztd::option("minimize-fct", false, "Minimize function names"), + ztd::option("var-exclude", true, "List of matching regex to ignore for variable processing", "list"), + ztd::option("fct-exclude", true, "List of matching regex to ignore for function processing", "list"), + ztd::option("list-var", false, "List all variables invoked in the script"), + ztd::option("list-fct", false, "List all functions invoked in the script"), + ztd::option("list-cmd", false, "List all functions invoked in the script"), + // ztd::option("unset-var", false, "Add 'unset' to vars"), + ztd::option("remove-unused", false, "Remove unused functions") + ); return ret; } +void get_opts() +{ + g_cd=!options['C'].activated; + g_include=!options["no-include"].activated; + g_resolve=!options["no-resolve"].activated; + if(options["var-exclude"]) + { + std::string t=options["var-exclude"]; + re_var_exclude=var_exclude_regex(t); + } + if(options["fct-exclude"]) + re_fct_exclude=fct_exclude_regex(options["fct-exclude"]); +} + ztd::option_set create_include_opts() { ztd::option_set opts; opts.add( - ztd::option('s', false, "Single quote contents"), - ztd::option('d', false, "Double quote contents"), ztd::option('e', false, "Escape double quotes"), - ztd::option('r', false, "Include raw contents, don't parse"), ztd::option('C', false, "Don't cd to folder the file is in"), ztd::option('f', false, "Force include even if already included. Don't count as included") ); @@ -34,7 +68,6 @@ ztd::option_set create_resolve_opts() { ztd::option_set opts; opts.add( - ztd::option('p', false, "Parse contents as shell code"), ztd::option('C', false, "Don't cd to folder this file is in"), ztd::option('f', false, "Ignore non-zero return values") ); @@ -45,21 +78,21 @@ void print_help(const char* arg0) { printf("%s [options] [arg...]\n", arg0); printf("Link extended shell\n"); - printf("Allows file including and command resolving\n"); + printf("Include files and resolve commands on build time\n"); printf("See --help-commands for help on linker commands\n"); printf("\n"); printf("Options:\n"); - options.print_help(3,20); + options.print_help(4,25); } void print_include_help() { printf("%%include [options] \n"); printf("Include the targeted files, from folder of current file\n"); + printf("Default behaviour is to include and parse contents as shell code\n"); printf(" - Regular shell processing applies to the file arguments\n"); - printf(" - Only includes not already included files\n"); - printf(" - `%%include` in command substitutions replaces the substitution\n"); - printf(" =>`%%include_s` can be used inside a substitution to prevent this\n"); + printf(" - Only includes not already included files. Can be forced with -f\n"); + printf(" - `%%include` inside substitutions replaces the substitution and includes raw contents\n"); printf("\n"); ztd::option_set opts=create_include_opts(); @@ -70,9 +103,9 @@ void print_resolve_help() { printf("%%resolve [options] \n"); printf("Execute shell command and substitute output, from folder of current file\n"); + printf(" - Default behaviour is to parse contents as shell code\n"); printf(" - Fails if return value is not 0. Can be ignored with -f\n"); - printf(" - `%%resolve` in command substitutions replaces the substitution\n"); - printf(" =>`%%resolve_s` can be used inside a substitution to prevent this\n"); + printf(" - `%%include` inside substitutions replaces the substitution and puts raw response\n"); printf("\n"); ztd::option_set opts=create_resolve_opts(); diff --git a/src/parse.cpp b/src/parse.cpp index 99b9459..ceb78a0 100644 --- a/src/parse.cpp +++ b/src/parse.cpp @@ -6,9 +6,12 @@ #include "util.hpp" -std::string g_origin; +#define ORIGIN_NONE "" -const std::vector reserved_words = { "if", "then", "else", "fi", "case", "esac", "for", "while", "until", "do", "done", "{", "}" }; +#define PARSE_ERROR(str, i) ztd::format_error(str, "", in, i) + +const std::vector all_reserved_words = { "if", "then", "else", "fi", "case", "esac", "for", "while", "do", "done", "{", "}" }; +const std::vector out_reserved_words = { "then", "else", "fi", "esac", "do", "done", "}" }; std::string g_expecting; @@ -22,11 +25,6 @@ std::string expecting(std::string const& word) // basic char utils -inline bool is_in(char c, const char* set) -{ - return strchr(set, c) != NULL; -} - bool has_common_char(const char* str1, const char* str2) { uint32_t i=0; @@ -47,8 +45,9 @@ inline bool is_alpha(char c) return (c >= 'a' && c<='z') || (c >= 'A' && c<='Z'); } -bool is_alphanum(std::string const& str) +bool valid_name(std::string const& str) { + if(!is_alpha(str[0]) && str[0] != '_') return false; for(auto it: str) { if(! (is_alphanum(it) || it=='_' ) ) @@ -57,16 +56,11 @@ bool is_alphanum(std::string const& str) return true; } -bool valid_name(std::string const& str) -{ - return (is_alpha(str[0]) || str[0] == '_') && is_alphanum(str); -} - // string utils -bool word_is_reserved(std::string const in) +bool word_is_reserved_out(std::string const in) { - for(auto it: reserved_words) + for(auto it: out_reserved_words) if(in == it) return true; return false; @@ -88,7 +82,7 @@ bool word_eq(const char* word, const char* in, uint32_t size, uint32_t start, co return false; } -std::pair get_word(const char* in, uint32_t size, uint32_t start, const char* end_set) +std::pair get_word(const char* in, uint32_t size, uint32_t start, const char* end_set) { uint32_t i=start; while(i parse_subshell(const char* in, uint32_t size, uint32_t start); +std::pair parse_varname(const char* in, uint32_t size, uint32_t start) +{ + uint32_t i=start; + std::string ret; + + // special vars + if(is_in(in[i], SPECIAL_VARS) || (in[i]>='0' && in[i]<='1')) + { + ret=in[i]; + i++; + } + else // varname + { + while(i parse_arithmetic(const char* in, uint32_t size, uint32_t start) +std::pair parse_arithmetic(const char* in, uint32_t size, uint32_t start) { - subarg_arithmetic* ret = new subarg_arithmetic; + arithmetic_subarg* ret = new arithmetic_subarg; uint32_t i=start; try @@ -147,7 +162,7 @@ std::pair parse_arithmetic(const char* in, uint32_ delete pp.first; if(i >= size || in[i]!=')') { - throw ztd::format_error( "Unexpected token ')', expecting '))'", g_origin, in, i ); + throw PARSE_ERROR( "Unexpected token ')', expecting '))'", i ); } ret->val = std::string(in+start, i-start-1); i++; @@ -167,14 +182,14 @@ std::pair parse_arithmetic(const char* in, uint32_ std::pair parse_arg(const char* in, uint32_t size, uint32_t start) { arg* ret = new arg; - // j : start of subarg + // j : start of subarg , q = start of quote uint32_t i=start,j=start,q=start; try { if(is_in(in[i], SPECIAL_TOKENS)) - throw ztd::format_error( strf("Unexpected token '%c'", in[i]) , g_origin, in, i); + throw PARSE_ERROR( strf("Unexpected token '%c'", in[i]) , i); while(i parse_arg(const char* in, uint32_t size, uint32_t star else if( word_eq("$((", in, size, i) ) // arithmetic operation { // add previous subarg - ret->sa.push_back(new subarg_string(std::string(in+j, i-j))); + ret->sa.push_back(new string_subarg(std::string(in+j, i-j))); i+=3; // get arithmetic auto r=parse_arithmetic(in, size, i); @@ -212,18 +227,32 @@ std::pair parse_arg(const char* in, uint32_t size, uint32_t star else if( word_eq("$(", in, size, i) ) // substitution { // add previous subarg - ret->sa.push_back(new subarg_string(std::string(in+j, i-j))); + ret->sa.push_back(new string_subarg(std::string(in+j, i-j))); i+=2; // get subshell auto r=parse_subshell(in, size, i); - ret->sa.push_back(new subarg_subshell(r.first)); + ret->sa.push_back(new subshell_subarg(r.first, true)); j = i = r.second; } + else if( in[i] == '$' ) + { + auto r=parse_varname(in, size, i+1); + if(r.second > i+1) + { + // add previous subarg + ret->sa.push_back(new string_subarg(std::string(in+j, i-j))); + // add varname + ret->sa.push_back(new variable_subarg(r.first)); + j = i = r.second; + } + else + i++; + } else i++; if(i>=size) - throw ztd::format_error("Unterminated double quote", g_origin, in, q); + throw PARSE_ERROR("Unterminated double quote", q); } i++; } @@ -234,13 +263,13 @@ std::pair parse_arg(const char* in, uint32_t size, uint32_t star while(i=size) - throw ztd::format_error("Unterminated single quote", g_origin, in, q); + throw PARSE_ERROR("Unterminated single quote", q); i++; } else if( word_eq("$((", in, size, i) ) // arithmetic operation { // add previous subarg - ret->sa.push_back(new subarg_string(std::string(in+j, i-j))); + ret->sa.push_back(new string_subarg(std::string(in+j, i-j))); i+=3; // get arithmetic auto r=parse_arithmetic(in, size, i); @@ -250,25 +279,34 @@ std::pair parse_arg(const char* in, uint32_t size, uint32_t star else if( word_eq("$(", in, size, i) ) // substitution { // add previous subarg - ret->sa.push_back(new subarg_string(std::string(in+j, i-j))); + ret->sa.push_back(new string_subarg(std::string(in+j, i-j))); i+=2; // get subshell auto r=parse_subshell(in, size, i); - ret->sa.push_back(new subarg_subshell(r.first)); + ret->sa.push_back(new subshell_subarg(r.first, false)); j = i = r.second; } - else if( word_eq("$#", in, size, i) ) - i+=2; + else if( in[i] == '$' ) + { + auto r=parse_varname(in, size, i+1); + if(r.second > i+1) + { + // add previous subarg + ret->sa.push_back(new string_subarg(std::string(in+j, i-j))); + // add varname + ret->sa.push_back(new variable_subarg(r.first)); + j = i = r.second; + } + else + i++; + } else i++; } // add string subarg std::string val=std::string(in+j, i-j); - ret->sa.push_back(new subarg_string(val)); - - // raw string for other uses - ret->raw = std::string(in+start, i-start); + ret->sa.push_back(new string_subarg(val)); } catch(ztd::format_error& e) @@ -294,7 +332,7 @@ std::pair parse_arglist(const char* in, uint32_t size, uint3 if(is_in(in[i], SPECIAL_TOKENS)) { if(hard_error) - throw ztd::format_error( strf("Unexpected token '%c'", in[i]) , g_origin, in, i); + throw PARSE_ERROR( strf("Unexpected token '%c'", in[i]) , i); else return std::make_pair(ret, i); } @@ -305,7 +343,7 @@ std::pair parse_arglist(const char* in, uint32_t size, uint3 i = skip_chars(in, size, pp.second, SPACES); if(i>=size) return std::make_pair(ret, i); - if(is_in(in[i], SPECIAL_TOKENS) ) + if( is_in(in[i], SPECIAL_TOKENS) ) return std::make_pair(ret, i); } } @@ -342,7 +380,7 @@ std::pair parse_pipeline(const char* in, uint32_t size, uin if( i>=size || is_in(in[i], PIPELINE_END) || word_eq("||", in, size, i) ) return std::make_pair(ret, i); else if( in[i] != '|' ) - throw ztd::format_error( strf("Unexpected token: '%c'", in[i] ), g_origin, in, i); + throw PARSE_ERROR( strf("Unexpected token: '%c'", in[i] ), i); i++; } } @@ -370,14 +408,10 @@ std::pair parse_condlist(const char* in, uint32_t size, uin auto pp=parse_pipeline(in, size, i); ret->add(pp.first, optype); i = pp.second; - if(i>=size || is_in(in[i], CONTROL_END)) // end here exactly: used for control later + if(i>=size || is_in(in[i], CONTROL_END) || is_in(in[i], COMMAND_SEPARATOR)) // end here exactly: used for control later { return std::make_pair(ret, i); } - else if(is_in(in[i], COMMAND_SEPARATOR)) // end one char after: skip them for next parse - { - return std::make_pair(ret, i+1); - } else if( word_eq("&", in, size, i) && !word_eq("&&", in, size, i) ) // parallel: end one char after { ret->parallel=true; @@ -395,10 +429,10 @@ std::pair parse_condlist(const char* in, uint32_t size, uin optype=OR_OP; } else - throw ztd::format_error( strf("Unexpected token: '%c'", in[i]), g_origin, in, i); + throw PARSE_ERROR( strf("Unexpected token: '%c'", in[i]), i); i = skip_unread(in, size, i); if(i>=size) - throw ztd::format_error( "Unexpected end of file", g_origin, in, i ); + throw PARSE_ERROR( "Unexpected end of file", i ); } } catch(ztd::format_error& e) @@ -409,21 +443,30 @@ std::pair parse_condlist(const char* in, uint32_t size, uin return std::make_pair(ret, i); } -std::pair parse_list_until(const char* in, uint32_t size, uint32_t start, char end_c) +std::pair parse_list_until(const char* in, uint32_t size, uint32_t start, char end_c, const char* expecting=NULL) { - std::vector ret; + list* ret = new list; uint32_t i=skip_unread(in, size, start); try { - while(end_c == 0 || in[i] != end_c) + while(in[i] != end_c) { auto pp=parse_condlist(in, size, i); - ret.push_back(pp.first); - i = skip_unread(in, size, pp.second); + ret->cls.push_back(pp.first); + if(is_in(in[pp.second], COMMAND_SEPARATOR)) + i = skip_unread(in, size, pp.second+1); + else + i = skip_unread(in, size, pp.second); + if(i>=size) { if(end_c != 0) - throw ztd::format_error(strf("Expecting '%c'", end_c), g_origin, in, start-1); + { + if(expecting!=NULL) + throw PARSE_ERROR(strf("Expecting '%s'", expecting), start-1); + else + throw PARSE_ERROR(strf("Expecting '%c'", end_c), start-1); + } else break; } @@ -431,16 +474,15 @@ std::pair parse_list_until(const char* in, uint32_t size, uint } catch(ztd::format_error& e) { - for(auto it: ret) - delete it; + delete ret; throw e; } return std::make_pair(ret, i); } -std::pair parse_list_until(const char* in, uint32_t size, uint32_t start, std::string const& end_word) +std::pair parse_list_until(const char* in, uint32_t size, uint32_t start, std::string const& end_word) { - std::vector ret; + list* ret = new list; uint32_t i=skip_unread(in, size, start); try { @@ -457,29 +499,31 @@ std::pair parse_list_until(const char* in, uint32_t size, uint } // do a parse auto pp=parse_condlist(in, size, i); - ret.push_back(pp.first); - i = skip_unread(in, size, pp.second); + ret->cls.push_back(pp.first); + if(is_in(in[pp.second], COMMAND_SEPARATOR)) + i = skip_unread(in, size, pp.second+1); + else + i = skip_unread(in, size, pp.second); // word wasn't found if(i>=size) { - throw ztd::format_error(strf("Expecting '%s'", end_word.c_str()), g_origin, in, start-1); + throw PARSE_ERROR(strf("Expecting '%s'", end_word.c_str()), start-1); } } g_expecting=old_expect; } catch(ztd::format_error& e) { - for(auto it: ret) - delete it; + delete ret; throw e; } return std::make_pair(ret, i); } -std::tuple parse_list_until(const char* in, uint32_t size, uint32_t start, std::vector const& end_words) +std::tuple parse_list_until(const char* in, uint32_t size, uint32_t start, std::vector const& end_words, const char* expecting=NULL) { - std::vector ret; + list* ret = new list; uint32_t i=skip_unread(in, size, start);; std::string found_end_word; try @@ -493,6 +537,13 @@ std::tuple parse_list_until(const char* in, uint3 auto wp=get_word(in, size, i, ARG_END); for(auto it: end_words) { + if(it == ";" && in[i] == ';') + { + found_end_word=";"; + i++; + stop=true; + break; + } if(wp.first == it) { found_end_word=it; @@ -505,20 +556,25 @@ std::tuple parse_list_until(const char* in, uint3 break; // do a parse auto pp=parse_condlist(in, size, i); - ret.push_back(pp.first); - i = skip_unread(in, size, pp.second); + ret->cls.push_back(pp.first); + if(is_in(in[pp.second], COMMAND_SEPARATOR)) + i = skip_unread(in, size, pp.second+1); + else + i = skip_unread(in, size, pp.second); // word wasn't found if(i>=size) { - throw ztd::format_error(strf("Expecting '%s'", end_words[0].c_str()), g_origin, in, start-1); + if(expecting!=NULL) + throw PARSE_ERROR(strf("Expecting '%s'", expecting), start-1); + else + throw PARSE_ERROR(strf("Expecting '%s'", end_words[0].c_str()), start-1); } } g_expecting=old_expect; } catch(ztd::format_error& e) { - for(auto it: ret) - delete it; + delete ret; throw e; } return std::make_tuple(ret, i, found_end_word); @@ -535,10 +591,10 @@ std::pair parse_subshell(const char* in, uint32_t size, uin try { auto pp=parse_list_until(in, size, start, ')'); - ret->cls=pp.first; + ret->lst=pp.first; i=pp.second; - if(ret->cls.size()<=0) - throw ztd::format_error("Subshell is empty", g_origin, in, start-1); + if(ret->lst->size()<=0) + throw PARSE_ERROR("Subshell is empty", start-1); i++; } catch(ztd::format_error& e) @@ -562,10 +618,10 @@ std::pair parse_brace(const char* in, uint32_t size, uint32_t try { auto pp=parse_list_until(in, size, start, '}'); - ret->cls=pp.first; + ret->lst=pp.first; i=pp.second; - if(ret->cls.size()<=0) - throw ztd::format_error("Brace block is empty", g_origin, in, start-1); + if(ret->lst->size()<=0) + throw PARSE_ERROR("Brace block is empty", start-1); i++; } catch(ztd::format_error& e) @@ -580,7 +636,7 @@ std::pair parse_brace(const char* in, uint32_t size, uint32_t // parse a function // must start right after the () // then parses a brace block -std::pair parse_function(const char* in, uint32_t size, uint32_t start) +std::pair parse_function(const char* in, uint32_t size, uint32_t start, const char* after="()") { uint32_t i=start; function* ret = new function; @@ -589,14 +645,14 @@ std::pair parse_function(const char* in, uint32_t size, uin { i=skip_unread(in, size, i); if(in[i] != '{') - throw ztd::format_error("Expecting { after ()", g_origin, in, i); + throw PARSE_ERROR( strf("Expecting { after %s", after) , i); i++; auto pp=parse_list_until(in, size, i, '}'); - if(pp.first.size()<=0) - throw ztd::format_error("Condition is empty", g_origin, in, i); + if(pp.first->size()<=0) + throw PARSE_ERROR("Condition is empty", i); - ret->cls=pp.first; + ret->lst=pp.first; i=pp.second; i++; } @@ -609,6 +665,7 @@ std::pair parse_function(const char* in, uint32_t size, uin return std::make_pair(ret, i); } +// must start at read char std::pair parse_cmd(const char* in, uint32_t size, uint32_t start) { cmd* ret = new cmd; @@ -616,9 +673,27 @@ std::pair parse_cmd(const char* in, uint32_t size, uint32_t star try { - auto pp=parse_arglist(in, size, start, true); - ret->args = pp.first; - i = pp.second; + while(true) // parse var assigns + { + auto wp=get_word(in, size, i, VARNAME_END); + if(wp.second parse_case(const char* in, uint32_t size, uint3 while(icases.push_back( std::pair() ); + ret->cases.push_back( std::make_pair(std::vector(), nullptr) ); // iterator to last element auto cc = ret->cases.end()-1; @@ -666,55 +741,40 @@ std::pair parse_case(const char* in, uint32_t size, uint3 { pa = parse_arg(in, size, i); cc->first.push_back(pa.first); - if(pa.first->raw == "") - throw ztd::format_error("Empty case value", g_origin, in, i); + if(pa.first->sa.size() <= 0) + throw PARSE_ERROR("Empty case value", i); i=skip_unread(in, size, pa.second); if(i>=size) - throw ztd::format_error("Unexpected end of file. Expecting 'esac'", g_origin, in, i); + throw PARSE_ERROR("Unexpected end of file. Expecting 'esac'", i); if(in[i] == ')') break; if(in[i] != '|' && is_in(in[i], SPECIAL_TOKENS)) - throw ztd::format_error( strf("Unexpected token '%c', expecting ')'", in[i]), g_origin, in, i ); + throw PARSE_ERROR( strf("Unexpected token '%c', expecting ')'", in[i]), i ); i=skip_unread(in, size, i+1); } i++; - while(true) // blocks + // until ;; + auto tp = parse_list_until(in, size, i, {";", "esac"}, ";;"); + cc->second = std::get<0>(tp); + i = std::get<1>(tp); + std::string word = std::get<2>(tp); + if(word == "esac") { - auto pc = parse_condlist(in, size, i); - cc->second.push_back(pc.first); - i=pc.second; - - if(i+1>=size) - throw ztd::format_error("Expecting ';;'", g_origin, in, i); - if(in[i] == ')') - throw ztd::format_error( strf("Unexpected token '%c', expecting ';;'", in[i]), g_origin, in, i ); - - // end of case: on same line - if(in[i-1] == ';' && in[i] == ';') - { - i++; - break; - } - - // end of case: on new line - i=skip_unread(in, size, i); - if(word_eq(";;", in, size, i)) - { - i+=2; - break; - } - // end of block: ignore missing ;; - if(word_eq("esac", in, size, i)) - break; - + i -= 4; + break; } - i=skip_unread(in, size, i); + if(i >= size) + throw PARSE_ERROR("Expecting ';;'", i); + if(in[i-1] != ';') + throw PARSE_ERROR("Unexpected token: ';'", i); + + i=skip_unread(in, size, i+1); } // ended before finding esac if(i>=size) - throw ztd::format_error("Expecting 'esac'", g_origin, in, i); + throw PARSE_ERROR("Expecting 'esac'", i); i+=4; } catch(ztd::format_error& e) @@ -735,43 +795,32 @@ std::pair parse_if(const char* in, uint32_t size, uint32_t { while(true) { - std::pair ll; std::string word; - try - { - auto pp=parse_list_until(in, size, i, "then"); - if(pp.first.size()<=0) - throw ztd::format_error("Condition is empty", g_origin, in, i); - i=pp.second; - ll.first=pp.first; + ret->blocks.push_back(std::make_pair(nullptr, nullptr)); + auto ll = ret->blocks.end()-1; - auto tp=parse_list_until(in, size, i, {"fi", "elif", "else"}); - if(std::get<0>(tp).size() <= 0) - throw ztd::format_error("if block is empty", g_origin, in, i); - ll.second = std::get<0>(tp); - i=std::get<1>(tp); - word=std::get<2>(tp); + auto pp=parse_list_until(in, size, i, "then"); + ll->first = pp.first; + i = pp.second; + if(ll->first->size()<=0) + throw PARSE_ERROR("Condition is empty", i); - ret->blocks.push_back(ll); - } - catch(ztd::format_error& e) - { - for(auto it: ll.first) - delete it; - for(auto it: ll.second) - delete it; - throw e; - } + auto tp=parse_list_until(in, size, i, {"fi", "elif", "else"}); + ll->second = std::get<0>(tp); + i = std::get<1>(tp); + word = std::get<2>(tp); + if(std::get<0>(tp)->size() <= 0) + throw PARSE_ERROR("if block is empty", i); if(word == "fi") break; if(word == "else") { auto pp=parse_list_until(in, size, i, "fi"); - if(pp.first.size()<=0) - throw ztd::format_error("else block is empty", g_origin, in, i); - ret->else_cls=pp.first; + if(pp.first->size()<=0) + throw PARSE_ERROR("else block is empty", i); + ret->else_lst=pp.first; i=pp.second; break; } @@ -798,7 +847,7 @@ std::pair parse_for(const char* in, uint32_t size, uint32_ auto wp = get_word(in, size, i, ARG_END); if(!valid_name(wp.first)) - throw ztd::format_error( strf("Bad identifier in for clause: '%s'", wp.first.c_str()), g_origin, in, i ); + throw PARSE_ERROR( strf("Bad identifier in for clause: '%s'", wp.first.c_str()), i ); ret->varname = wp.first; i=skip_chars(in, size, wp.second, SPACES); @@ -812,11 +861,11 @@ std::pair parse_for(const char* in, uint32_t size, uint32_ i = pp.second; } else if(wp.first != "") - throw ztd::format_error( "Expecting 'in' after for", g_origin, in, i ); + throw PARSE_ERROR( "Expecting 'in' after for", i ); // end of arg list if(!is_in(in[i], "\n;#")) - throw ztd::format_error( strf("Unexpected token '%c', expecting '\\n' or ';'", in[i]), g_origin, in, i ); + throw PARSE_ERROR( strf("Unexpected token '%c', expecting '\\n' or ';'", in[i]), i ); if(in[i] == ';') i++; i=skip_unread(in, size, i); @@ -824,7 +873,7 @@ std::pair parse_for(const char* in, uint32_t size, uint32_ // do wp = get_word(in, size, i, ARG_END); if(wp.first != "do") - throw ztd::format_error( "Expecting 'do', after for", g_origin, in, i); + throw PARSE_ERROR( "Expecting 'do', after for", i); i=skip_unread(in, size, wp.second); // ops @@ -850,18 +899,18 @@ std::pair parse_while(const char* in, uint32_t size, uin { // cond auto pp=parse_list_until(in, size, i, "do"); - if(pp.first.size() <= 0) - throw ztd::format_error("condition is empty", g_origin, in, i); - ret->cond = pp.first; - i=pp.second; + i = pp.second; + if(ret->cond->size() <= 0) + throw PARSE_ERROR("condition is empty", i); + // ops auto lp = parse_list_until(in, size, i, "done"); - if(lp.first.size() <= 0) - throw ztd::format_error("while is empty", g_origin, in, i); ret->ops=lp.first; - i=lp.second; + i = lp.second; + if(ret->ops->size() <= 0) + throw PARSE_ERROR("while is empty", i); } catch(ztd::format_error& e) { @@ -872,44 +921,6 @@ std::pair parse_while(const char* in, uint32_t size, uin return std::make_pair(ret, i); } -std::pair parse_fct_or_cmd(const char* in, uint32_t size, uint32_t start) -{ - block* ret = nullptr; - uint32_t i=start; - - try - { - // get first word - auto tp=get_word(in, size, start, ARG_END); - - i=skip_unread(in, size, tp.second); - if(word_eq("()", in, size, i)) // is a function - { - if(!valid_name(tp.first)) - throw ztd::format_error( strf("Bad function name: '%s'", tp.first.c_str()), g_origin, in, start ); - - auto pp = parse_function(in, size, i+2); - // first arg is function name - pp.first->name = tp.first; - ret = pp.first; - i = pp.second; - } - else // is a command - { - auto pp = parse_cmd(in, size, start); - ret = pp.first; - i = pp.second; - } - } - catch(ztd::format_error& e) - { - if(ret!=nullptr) delete ret; - throw e; - } - - return std::make_pair(ret, i); -} - // detect if brace, subshell, case or other std::pair parse_block(const char* in, uint32_t size, uint32_t start) { @@ -919,7 +930,7 @@ std::pair parse_block(const char* in, uint32_t size, uint32_t try { if(i>=size) - throw ztd::format_error("Unexpected end of file", g_origin, in, i); + throw PARSE_ERROR("Unexpected end of file", i); if( in[i] == '(' ) //subshell { auto pp = parse_subshell(in, size, i+1); @@ -928,8 +939,9 @@ std::pair parse_block(const char* in, uint32_t size, uint32_t } else { - auto wp=get_word(in, size, i, SEPARATORS); + auto wp=get_word(in, size, i, BLOCK_TOKEN_END); std::string word=wp.first; + // reserved words if( word == "{" ) // brace block { auto pp = parse_brace(in, size, wp.second); @@ -960,20 +972,48 @@ std::pair parse_block(const char* in, uint32_t size, uint32_t ret = pp.first; i = pp.second; } - else if( word == "until") + else if( word == "until" ) { auto pp=parse_while(in, size, wp.second); pp.first->real_condition()->negate(); ret = pp.first; i = pp.second; } - else if(word_is_reserved(word)) + else if(word_is_reserved_out(word)) { - throw ztd::format_error( "Unexpected '"+word+"'" + expecting(g_expecting) , g_origin, in, i); + throw PARSE_ERROR( "Unexpected '"+word+"'" + expecting(g_expecting) , i); } - else // other: command/function + // end reserved words + else if( word == "function" ) // bash style function { - auto pp = parse_fct_or_cmd(in, size, i); + auto wp2=get_word(in, size, skip_unread(in, size, wp.second), VARNAME_END); + if(!valid_name(wp2.first)) + throw PARSE_ERROR( strf("Bad function name: '%s'", word.c_str()), start ); + + i=skip_unread(in, size, wp2.second); + if(word_eq("()", in, size, i)) + i=skip_unread(in, size, i+2); + + auto pp = parse_function(in, size, i, "function definition"); + // function name + pp.first->name = wp2.first; + ret = pp.first; + i = pp.second; + } + else if(word_eq("()", in, size, skip_unread(in, size, wp.second))) // is a function + { + if(!valid_name(word)) + throw PARSE_ERROR( strf("Bad function name: '%s'", word.c_str()), start ); + + auto pp = parse_function(in, size, skip_unread(in, size, wp.second)+2); + // first arg is function name + pp.first->name = word; + ret = pp.first; + i = pp.second; + } + else // is a command + { + auto pp = parse_cmd(in, size, i); ret = pp.first; i = pp.second; } @@ -1004,12 +1044,13 @@ std::pair parse_block(const char* in, uint32_t size, uint32_t } // parse main -shmain* parse(const char* in, uint32_t size) +shmain* parse_text(const char* in, uint32_t size, std::string const& filename) { shmain* ret = new shmain(); uint32_t i=0; try { + ret->filename=filename; // get shebang if(word_eq("#!", in, size, 0)) { @@ -1019,13 +1060,13 @@ shmain* parse(const char* in, uint32_t size) i = skip_unread(in, size, i); // parse all commands auto pp=parse_list_until(in, size, i, 0); - ret->cls=pp.first; + ret->lst=pp.first; i=pp.second; } catch(ztd::format_error& e) { delete ret; - throw e; + throw ztd::format_error(e.what(), filename, e.data(), e.where()); } return ret; } diff --git a/src/resolve.cpp b/src/resolve.cpp new file mode 100644 index 0000000..6e19a75 --- /dev/null +++ b/src/resolve.cpp @@ -0,0 +1,379 @@ +#include "resolve.hpp" + +#include +#include + +#include "recursive.hpp" +#include "options.hpp" +#include "util.hpp" +#include "parse.hpp" + +// -- CD STUFF -- + +std::string pwd() +{ + char buf[2048]; + if(getcwd(buf, 2048) != NULL) + { + std::string ret=ztd::exec("pwd").first; // getcwd failed: call pwd + ret.pop_back(); + return ret; + } + return std::string(buf); +} + +// returns path to old dir +std::string _pre_cd(shmain* parent) +{ + if(parent->is_dev_file() || parent->filename == "") + return ""; + std::string dir=pwd(); + std::string cddir=ztd::exec("dirname", parent->filename).first; + cddir.pop_back(); + if(chdir(cddir.c_str()) != 0) + throw std::runtime_error("Cannot cd to '"+cddir+"'"); + return dir; +} + +void _cd(std::string const& dir) +{ + if(dir!="" && chdir(dir.c_str()) != 0) + throw std::runtime_error("Cannot cd to '"+dir+"'"); +} + +// -- COMMANDS -- + +// return [] +std::vector> do_include_raw(condlist* cmd, shmain* parent, std::string* ex_dir=nullptr) +{ + std::vector> ret; + if(!g_include) + return ret; + + ztd::option_set opts = create_include_opts(); + std::vector rargs; + try + { + rargs = opts.process(cmd->first_cmd()->args->strargs(1), false, true, false); + } + catch(ztd::option_error& e) + { + throw std::runtime_error(std::string("%include: ")+e.what()); + } + + std::string dir; + if(g_cd && !opts['C']) + { + dir=_pre_cd(parent); + if(ex_dir!=nullptr) + *ex_dir=dir; + } + + std::string command="for I in "; + for(auto it: rargs) + command += it + ' '; + command += "; do echo $I ; done"; + std::string inc=ztd::sh(command); + + auto v = split(inc, '\n'); + + for(auto it: v) + { + if(opts['f'] || add_include(it)) + ret.push_back(std::make_pair(it, import_file(it))); + } + + if(ex_dir==nullptr) + _cd(dir); + + return ret; +} + +std::vector do_include_parse(condlist* cmd, shmain* parent) +{ + std::vector ret; + if(!g_include) + return ret; + + std::string dir; + auto incs=do_include_raw(cmd, parent, &dir); + + + for(auto it: incs) + { + shmain* sh=parse_text(it.second, it.first); + resolve(sh); + // get the cls + ret.insert(ret.end(), sh->lst->cls.begin(), sh->lst->cls.end()); + // safety and cleanup + sh->lst->cls.resize(0); + delete sh; + } + // cd back + _cd(dir); + + return ret; +} + +// +std::pair do_resolve_raw(condlist* cmd, shmain* parent, std::string* ex_dir=nullptr) +{ + std::pair ret; + if(!g_resolve) + return ret; + + ztd::option_set opts = create_resolve_opts(); + std::vector rargs; + try + { + rargs = opts.process(cmd->first_cmd()->args->strargs(1), false, true, false); + } + catch(ztd::option_error& e) + { + throw std::runtime_error(std::string("%resolve: ")+e.what()); + } + + std::string dir; + if(g_cd && !opts['C']) + { + dir=_pre_cd(parent); + if(ex_dir!=nullptr) + *ex_dir=dir; + } + + cmd->prune_first_cmd(); + + std::string fullcmd=concatargs(rargs); + std::string othercmd=cmd->generate(0); + if(othercmd != "") + fullcmd += '|' + othercmd; + + auto p=ztd::shp(fullcmd); + + if(!opts['f'] && p.second!=0) + { + throw std::runtime_error( strf("command `%s` returned %u", fullcmd.c_str(), p.second) ); + } + + if(ex_dir==nullptr) + _cd(dir); + + while(p.first[p.first.size()-1] == '\n') + p.first.pop_back(); + + ret = std::make_pair(fullcmd, p.first); + return ret; +} + +// if first is nullptr: is a string +std::vector do_resolve_parse(condlist* cmd, shmain* parent) +{ + std::vector ret; + if(!g_resolve) + return ret; + + std::pair p; + try + { + // get + std::string dir; + p=do_resolve_raw(cmd, parent, &dir); + // do parse + shmain* sh = parse_text(p.second); + resolve(sh); + // get the cls + ret = sh->lst->cls; + // safety and cleanup + sh->lst->cls.resize(0); + delete sh; + // cd back + _cd(dir); + } + catch(ztd::format_error& e) + { + throw ztd::format_error(e.what(), '`'+p.first+'`', e.data(), e.where()); + } + + return ret; +} + +// -- OBJECT CALLS -- + +std::vector resolve_condlist(condlist* in, shmain* parent) +{ + cmd* tc = in->first_cmd(); + if(tc == nullptr) + return std::vector(); + + std::string const& strcmd=tc->firstarg_string(); + + if(strcmd == "%include") + return do_include_parse(in, parent); + else if(strcmd == "%resolve") + return do_resolve_parse(in, parent); + else + return std::vector(); +} + +std::vector resolve_arg(arg* in, shmain* parent, bool forcequote=false) +{ + std::vector ret; + arg* ta=nullptr; + uint32_t j=0; + for(uint32_t i=0 ; isa.size() ; i++) + { + if(in->sa[i]->type != _obj::subarg_subshell) // skip if not subshell + continue; + + subshell_subarg* tsh = dynamic_cast(in->sa[i]); + if(tsh->sbsh->lst->cls.size() > 1) // skip if more than one cl + continue; + condlist* tc = tsh->sbsh->lst->cls[0]; + cmd* c = tc->first_cmd(); + if(c == nullptr) // skip if not cmd + continue; + std::string strcmd=c->firstarg_string(); + std::string fulltext; + if(strcmd == "%include") + { + for(auto it: do_include_raw(tc, parent) ) + fulltext += it.second; + } + else if(strcmd == "%resolve") + { + fulltext = do_resolve_raw(tc, parent).second; + } + else // skip + continue; + + if(tsh->quoted || forcequote) + { + stringReplace(fulltext, "\"", "\\\""); + stringReplace(fulltext, "!", "\\!"); + } + if(!tsh->quoted && forcequote) + fulltext = '"' + fulltext + '"'; + + + if(tsh->quoted || forcequote) + { + // replace with new subarg + delete in->sa[i]; + in->sa[i] = new string_subarg(fulltext); + } + else + { + auto strargs=split(fulltext, SEPARATORS); + if(strargs.size() <= 1) + { + std::string val; + if(strargs.size() == 1) + val = strargs[0]; + delete in->sa[i]; + in->sa[i] = new string_subarg(val); + } + else // pack + { + if(ta == nullptr) + ta = new arg; + ta->sa.insert(ta->sa.end(), in->sa.begin()+j, in->sa.begin()+i); + ta->sa.push_back(new string_subarg(strargs[i])); + j=i+1; + delete in->sa[i]; + for(uint32_t li=1 ; lisa.push_back(new string_subarg(strargs[li])); + } + + } // end pack + + } // end non quoted + + } // end for + if(ta != nullptr) + { + ta->sa.insert(ta->sa.end(), in->sa.begin()+j, in->sa.end()); + if(ta->sa.size() > 0) + ret.push_back(ta); + else + delete ta; + in->sa.resize(0); + } + return ret; +} + + +// -- RECURSIVE CALL -- + +void resolve_recurse(_obj* o, shmain* parent) +{ + switch(o->type) + { + case _obj::_list : + { + auto t = dynamic_cast(o); + for(uint32_t i=0 ; icls.size() ; i++) + { + std::vector r=resolve_condlist(t->cls[i], parent); + if(r.size()>0) + { + // add new cls after current + t->cls.insert(t->cls.begin()+i+1, r.begin(), r.end()); + // delete current + delete t->cls[i]; + t->cls.erase(t->cls.begin()+i); + // back to previous object + i--; + } + } + // list + } break; + case _obj::_arglist : + { + auto t = dynamic_cast(o); + for(uint32_t i=0 ; iargs.size() ; i++) + { + auto r=resolve_arg(t->args[i], parent); + if(r.size()>0) + { + // add new args + t->args.insert(t->args.begin()+i+1, r.begin(), r.end()); + // delete current + delete t->args[i]; + t->args.erase(t->args.begin()+i); + i += r.size()-1; + } + } + // arglist + return; + } break; + case _obj::block_cmd : + { + auto t = dynamic_cast(o); + for(auto it: t->var_assigns) + resolve_arg(it.second, parent, true); // force quoted + }; break; + case _obj::block_case : + { + auto t = dynamic_cast(o); + for(auto sc: t->cases) + { + resolve_arg(t->carg, parent, true); // force quoted + { + for(auto it: sc.first) + resolve_arg(it, parent, true); // force quoted + } + } + }; break; + default: break; + } + return; +} + +// recursive call of resolve +void resolve(shmain* sh) +{ + recurse(resolve_recurse, sh, sh); +} diff --git a/src/struc.cpp b/src/struc.cpp index 1f843ec..44d6ee0 100644 --- a/src/struc.cpp +++ b/src/struc.cpp @@ -1,9 +1,12 @@ #include "struc.hpp" #include "util.hpp" +#include "options.hpp" #include +std::string g_origin=""; + const std::string cmd::empty_string=""; cmd* make_cmd(std::vector args) @@ -20,8 +23,13 @@ cmd* make_cmd(std::vector args) std::vector arglist::strargs(uint32_t start) { std::vector ret; + bool t=opt_minimize; + opt_minimize=true; for(uint32_t i=start; iraw); + { + ret.push_back(args[i]->generate(0)); + } + opt_minimize=t; return ret; } @@ -30,15 +38,24 @@ void arg::setstring(std::string const& str) for(auto it: sa) delete it; sa.resize(0); - sa.push_back(new subarg_string(str)); - raw = str; + sa.push_back(new string_subarg(str)); } std::string arg::string() { - if(sa.size() > 1 || sa[0]->type != subarg::s_string) + if(sa.size() != 1 || sa[0]->type != subarg::subarg_string) return ""; - return sa[0]->generate(0); + return dynamic_cast(sa[0])->val; +} + + +void condlist::prune_first_cmd() +{ + if(pls.size()>0 && pls[0]->cmds.size()>0) + { + delete pls[0]->cmds[0]; + pls[0]->cmds.erase(pls[0]->cmds.begin()); + } } void condlist::add(pipeline* pl, bool or_op) @@ -48,13 +65,29 @@ void condlist::add(pipeline* pl, bool or_op) this->pls.push_back(pl); } +block* condlist::first_block() +{ + if(pls.size() > 0 && pls[0]->cmds.size() > 0) + return (pls[0]->cmds[0]); + else + return nullptr; +} + +cmd* condlist::first_cmd() +{ + if(pls.size() > 0 && pls[0]->cmds.size() > 0 && pls[0]->cmds[0]->type == _obj::block_cmd) + return dynamic_cast(pls[0]->cmds[0]); + else + return nullptr; +} + cmd* block::single_cmd() { - if(this->type == block::block_subshell) + if(this->type == _obj::block_subshell) { return dynamic_cast(this)->single_cmd(); } - if(this->type == block::block_brace) + if(this->type == _obj::block_brace) { return dynamic_cast(this)->single_cmd(); } @@ -63,23 +96,47 @@ cmd* block::single_cmd() cmd* subshell::single_cmd() { - if( cls.size() == 1 && // only one condlist - cls[0]->pls.size() == 1 && // only one pipeline - cls[0]->pls[0]->cmds.size() == 1 && // only one block - cls[0]->pls[0]->cmds[0]->type == block::block_cmd) // block is a command - return dynamic_cast(cls[0]->pls[0]->cmds[0]); // return command + if( lst->size() == 1 && // only one condlist + (*lst)[0]->pls.size() == 1 && // only one pipeline + (*lst)[0]->pls[0]->cmds.size() == 1 && // only one block + (*lst)[0]->pls[0]->cmds[0]->type == _obj::block_cmd) // block is a command + return dynamic_cast((*lst)[0]->pls[0]->cmds[0]); // return command return nullptr; } cmd* brace::single_cmd() { - if( cls.size() == 1 && // only one condlist - cls[0]->pls.size() == 1 && // only one pipeline - cls[0]->pls[0]->cmds.size() == 1 && // only one block - cls[0]->pls[0]->cmds[0]->type == block::block_cmd) // block is a command - return dynamic_cast(cls[0]->pls[0]->cmds[0]); // return command + if( lst->size() == 1 && // only one condlist + (*lst)[0]->pls.size() == 1 && // only one pipeline + (*lst)[0]->pls[0]->cmds.size() == 1 && // only one block + (*lst)[0]->pls[0]->cmds[0]->type == _obj::block_cmd) // block is a command + return dynamic_cast((*lst)[0]->pls[0]->cmds[0]); // return command return nullptr; } +cmd* condlist::get_cmd(std::string const& cmdname) +{ + for(auto pl: pls) + { + for(auto bl: pl->cmds) + { + if(bl->type == _obj::block_cmd) + { + cmd* c=dynamic_cast(bl); + if(c->args->size()>0 && (*c->args)[0]->equals(cmdname) ) + return c; + } + } + } + return nullptr; +} + +void shmain::concat(shmain* in) +{ + this->lst->cls.insert(this->lst->cls.end(), in->lst->cls.begin(), in->lst->cls.end()); + in->lst->cls.resize(0); + if(this->shebang == "") + this->shebang = in->shebang; +} void condlist::negate() { @@ -91,9 +148,9 @@ void condlist::negate() or_ops[i] = !or_ops[i]; } -std::string const& cmd::firstarg_raw() +std::string const& cmd::firstarg_string() { - if(args!=nullptr && args->size()>0) - return args->args[0]->raw; + if(args!=nullptr && args->args.size()>0 && args->args[0]->sa.size() == 1 && args->args[0]->sa[0]->type == _obj::subarg_string) + return dynamic_cast(args->args[0]->sa[0])->val; return cmd::empty_string; } diff --git a/src/util.cpp b/src/util.cpp index bebe657..771d0dc 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -20,14 +20,38 @@ std::string indent(int n) return ret; } -std::vector split(std::string const& in, char c) +std::vector split(std::string const& in, const char* splitters) { uint32_t i=0,j=0; std::vector ret; + // skip first splitters + while(i split(std::string const& in, char c) +{ + size_t i=0,j=0; + std::vector ret; while(j const& args) { - char buf[2048]; - if(getcwd(buf, 2048) != NULL) + std::string ret; + for(auto it: args) + ret += it + ' '; + ret.pop_back(); + return ret; +} + +void concat_sets(std::set& a, std::set const& b) +{ + for(auto it: b) { - std::string ret=ztd::exec("pwd").first; // getcwd failed: call pwd - ret.pop_back(); - return ret; + a.insert( it ); } - return std::string(buf); +} + +std::set prune_matching(std::set& in, std::regex re) +{ + std::set ret; + auto it=in.begin(); + auto prev=in.end(); + while(it!=in.end()) + { + if( std::regex_match(*it, re) ) + { + ret.insert(*it); + in.erase(it); + if(prev == in.end()) + it = in.begin(); + else + { + it = prev; + it++; + } + } + else + { + prev=it; + it++; + } + } + return ret; } int _exec(std::string const& bin, std::vector const& args) @@ -166,3 +223,46 @@ void printErrorIndex(const char* in, const int index, const std::string& message } } } + + +int execute(shmain* sh, std::vector& args) +{ + std::string data=sh->generate(); + + std::string filename=ztd::exec("basename", args[0]).first; + filename.pop_back(); + + // generate path + std::string tmpdir = (getenv("TMPDIR") != NULL) ? getenv("TMPDIR") : "/tmp" ; + std::string dirpath = tmpdir + "/lxsh_" + ztd::sh("tr -dc '[:alnum:]' < /dev/urandom | head -c10"); + std::string filepath = dirpath+'/'+filename; + + // create dir + if(ztd::exec("mkdir", "-p", dirpath).second) + { + throw std::runtime_error("Failed to create directory '"+dirpath+'\''); + } + + // create stream + std::ofstream stream(filepath); + if(!stream) + { + ztd::exec("rm", "-rf", dirpath); + throw std::runtime_error("Failed to write to '"+filepath+'\''); + } + + // output + stream << data; + stream.close(); + if(ztd::exec("chmod", "+x", filepath).second != 0) + { + ztd::exec("rm", "-rf", dirpath); + throw std::runtime_error("Failed to make '"+filepath+"' executable"); + } + + // exec + int retval=_exec(filepath, args); + ztd::exec("rm", "-rf", dirpath); + + return retval; +}