From a0155991e3b5131485a4637d12a95760dad86cd1 Mon Sep 17 00:00:00 2001 From: zawz Date: Fri, 13 Nov 2020 15:18:30 +0100 Subject: [PATCH] Performance optimizations - Replaced some exec() calls with functions - Removed resolved code being resolved again - Removed empty string subargs inserted upon parsing --- include/recursive.hpp | 5 +- include/resolve.hpp | 5 ++ include/struc.hpp | 2 - include/util.hpp | 3 + src/generate.cpp | 14 ----- src/main.cpp | 9 +-- src/minimize.cpp | 27 ++++----- src/parse.cpp | 35 +++++++++--- src/resolve.cpp | 125 +++++++++++++++++++++++++++--------------- src/util.cpp | 21 ++++++- 10 files changed, 157 insertions(+), 89 deletions(-) diff --git a/include/recursive.hpp b/include/recursive.hpp index db2cc53..1c04f43 100644 --- a/include/recursive.hpp +++ b/include/recursive.hpp @@ -7,13 +7,14 @@ // boolean value of fct: if true, recurse on this object, if false, skip this object template -void recurse(void (&fct)(_obj*, Args...), _obj* o, Args... args) +void recurse(bool (&fct)(_obj*, Args...), _obj* o, Args... args) { if(o == nullptr) return; // execution - fct(o, args...); + if(!fct(o, args...)) + return; // skip recurse if false // recursive calls switch(o->type) diff --git a/include/resolve.hpp b/include/resolve.hpp index eaa16ee..7ce5d26 100644 --- a/include/resolve.hpp +++ b/include/resolve.hpp @@ -3,6 +3,11 @@ #include "struc.hpp" +extern std::vector included; + +bool add_include(std::string const& file); + +void resolve(_obj* sh, shmain* parent); void resolve(shmain* sh); #endif //RESOLVE_HPP diff --git a/include/struc.hpp b/include/struc.hpp index cacb0f1..0d9ed68 100644 --- a/include/struc.hpp +++ b/include/struc.hpp @@ -76,8 +76,6 @@ extern std::string g_origin; cmd* make_cmd(std::vector args); -bool add_include(std::string const& file); - class _obj { public: diff --git a/include/util.hpp b/include/util.hpp index ceee9fd..59cc11e 100644 --- a/include/util.hpp +++ b/include/util.hpp @@ -18,6 +18,9 @@ extern std::string indenting_string; +std::string basename(std::string const& in); +std::string dirname(std::string const& in); + std::string indent(int n); std::vector split(std::string const& in, const char* splitters); diff --git a/src/generate.cpp b/src/generate.cpp index 6670bbf..8e569a4 100644 --- a/src/generate.cpp +++ b/src/generate.cpp @@ -6,8 +6,6 @@ #include "options.hpp" #include "parse.hpp" -std::vector included; - bool is_sub_special_cmd(std::string in) { return in == "%include_sub" || in == "%resolve_sub"; @@ -111,18 +109,6 @@ std::string list::generate(int ind, bool first_indent) return ret; } -bool add_include(std::string const& file) -{ - std::string truepath=ztd::exec("readlink", "-f", file).first; - for(auto it: included) - { - if(it == truepath) - return false; - } - included.push_back(truepath); - return true; -} - // BLOCK std::string block::generate_redirs(int ind) diff --git a/src/main.cpp b/src/main.cpp index bcf0752..09ce05e 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -86,7 +86,7 @@ int main(int argc, char* argv[]) // do parsing for(uint32_t i=0 ; ishebang).first; + bool shebang_is_bin = basename(argv[0]) == basename(tsh->shebang); if(shebang_is_bin) tsh->shebang="#!/bin/sh"; @@ -112,7 +112,6 @@ int main(int argc, char* argv[]) if(!is_exec && args.size() > 1) // not exec: parse options on args { - std::string t=args[0]; args=options.process(args); } @@ -123,7 +122,9 @@ int main(int argc, char* argv[]) /* mid processing */ // resolve/include if(g_include || g_resolve) + { resolve(tsh); + } // concatenate to main sh->concat(tsh); @@ -180,7 +181,7 @@ int main(int argc, char* argv[]) printFormatError(e); return 100; } - catch(std::exception& e) + catch(std::runtime_error& e) { if(tsh != nullptr) delete tsh; diff --git a/src/minimize.cpp b/src/minimize.cpp index 83ee296..51ffbd5 100644 --- a/src/minimize.cpp +++ b/src/minimize.cpp @@ -87,10 +87,10 @@ std::string get_varname(subarg* in) /** VAR RECURSE **/ -void get_map_varname(_obj* in, std::map* variable_map) +bool get_map_varname(_obj* in, std::map* variable_map) { if(variable_map == nullptr) - return; + return false; switch(in->type) { case _obj::subarg_variable: { @@ -122,9 +122,10 @@ void get_map_varname(_obj* in, std::map* variable_map) }; break; default: break; } + return true; } -void replace_varname(_obj* in, std::map* varmap) +bool replace_varname(_obj* in, std::map* varmap) { switch(in->type) { @@ -170,15 +171,13 @@ void replace_varname(_obj* in, std::map* varmap) }; break; default: break; } + return true; } /** FCT RECURSE **/ -void get_map_cmd(_obj* in, std::map* all_cmds) +bool get_map_cmd(_obj* in, std::map* all_cmds) { - if(all_cmds == nullptr) - return; - switch(in->type) { case _obj::block_cmd: { @@ -189,12 +188,11 @@ void get_map_cmd(_obj* in, std::map* all_cmds) }; break; default: break; } + return true; } -void get_map_fctname(_obj* in, std::map* fct_map) +bool get_map_fctname(_obj* in, std::map* fct_map) { - if(fct_map == nullptr) - return; switch(in->type) { case _obj::block_function: { @@ -204,9 +202,10 @@ void get_map_fctname(_obj* in, std::map* fct_map) }; break; default: break; } + return true; } -void replace_fctname(_obj* in, std::map* fctmap) +bool replace_fctname(_obj* in, std::map* fctmap) { switch(in->type) { @@ -228,9 +227,10 @@ void replace_fctname(_obj* in, std::map* fctmap) }; break; default: break; } + return true; } -void delete_fcts(_obj* in, std::set* fcts) +bool delete_fcts(_obj* in, std::set* fcts) { switch(in->type) { @@ -253,6 +253,7 @@ void delete_fcts(_obj* in, std::set* fcts) } default: break; } + return true; } /** name things **/ @@ -366,7 +367,7 @@ void delete_unused_fct(_obj* in, std::regex exclude) recurse(delete_fcts, in, &unused); } -void list_stuff(_obj* in, std::regex exclude, void (&fct)(_obj*,std::map*) ) +void list_stuff(_obj* in, std::regex exclude, bool (&fct)(_obj*,std::map*) ) { std::map map; recurse(fct, in, &map); diff --git a/src/parse.cpp b/src/parse.cpp index a200679..e832ef4 100644 --- a/src/parse.cpp +++ b/src/parse.cpp @@ -243,7 +243,9 @@ std::pair parse_arg(const char* in, uint32_t size, uint32_t star else if( word_eq("$((", in, size, i) ) // arithmetic operation { // add previous subarg - ret->sa.push_back(new string_subarg(std::string(in+j, i-j))); + std::string tmpstr=std::string(in+j, i-j); + if(tmpstr!="") + ret->sa.push_back(new string_subarg(tmpstr)); // get arithmetic auto r=parse_arithmetic(in, size, i+3); ret->sa.push_back(r.first); @@ -252,7 +254,9 @@ std::pair parse_arg(const char* in, uint32_t size, uint32_t star else if( word_eq("$(", in, size, i) ) // substitution { // add previous subarg - ret->sa.push_back(new string_subarg(std::string(in+j, i-j))); + std::string tmpstr=std::string(in+j, i-j); + if(tmpstr!="") + ret->sa.push_back(new string_subarg(tmpstr)); // get subshell auto r=parse_subshell(in, size, i+2); ret->sa.push_back(new subshell_subarg(r.first, true)); @@ -261,7 +265,9 @@ std::pair parse_arg(const char* in, uint32_t size, uint32_t star else if( word_eq("${", in, size, i) ) // variable manipulation { // add previous subarg - ret->sa.push_back(new string_subarg(std::string(in+j, i-j))); + std::string tmpstr=std::string(in+j, i-j); + if(tmpstr!="") + ret->sa.push_back(new string_subarg(tmpstr)); // get manipulation auto r=parse_manipulation(in, size, i+2); ret->sa.push_back(r.first); @@ -273,7 +279,9 @@ std::pair parse_arg(const char* in, uint32_t size, uint32_t star if(r.second > i+1) { // add previous subarg - ret->sa.push_back(new string_subarg(std::string(in+j, i-j))); + std::string tmpstr=std::string(in+j, i-j); + if(tmpstr!="") + ret->sa.push_back(new string_subarg(tmpstr)); // add varname ret->sa.push_back(new variable_subarg(r.first)); j = i = r.second; @@ -302,7 +310,9 @@ std::pair parse_arg(const char* in, uint32_t size, uint32_t star else if( word_eq("$((", in, size, i) ) // arithmetic operation { // add previous subarg - ret->sa.push_back(new string_subarg(std::string(in+j, i-j))); + std::string tmpstr=std::string(in+j, i-j); + if(tmpstr!="") + ret->sa.push_back(new string_subarg(tmpstr)); // get arithmetic auto r=parse_arithmetic(in, size, i+3); ret->sa.push_back(r.first); @@ -311,7 +321,9 @@ std::pair parse_arg(const char* in, uint32_t size, uint32_t star else if( word_eq("$(", in, size, i) ) // substitution { // add previous subarg - ret->sa.push_back(new string_subarg(std::string(in+j, i-j))); + std::string tmpstr=std::string(in+j, i-j); + if(tmpstr!="") + ret->sa.push_back(new string_subarg(tmpstr)); // get subshell auto r=parse_subshell(in, size, i+2); ret->sa.push_back(new subshell_subarg(r.first, false)); @@ -320,7 +332,9 @@ std::pair parse_arg(const char* in, uint32_t size, uint32_t star else if( word_eq("${", in, size, i) ) // variable manipulation { // add previous subarg - ret->sa.push_back(new string_subarg(std::string(in+j, i-j))); + std::string tmpstr=std::string(in+j, i-j); + if(tmpstr!="") + ret->sa.push_back(new string_subarg(tmpstr)); // get manipulation auto r=parse_manipulation(in, size, i+2); ret->sa.push_back(r.first); @@ -332,7 +346,9 @@ std::pair parse_arg(const char* in, uint32_t size, uint32_t star if(r.second > i+1) { // add previous subarg - ret->sa.push_back(new string_subarg(std::string(in+j, i-j))); + std::string tmpstr=std::string(in+j, i-j); + if(tmpstr!="") + ret->sa.push_back(new string_subarg(tmpstr)); // add varname ret->sa.push_back(new variable_subarg(r.first)); j = i = r.second; @@ -346,7 +362,8 @@ std::pair parse_arg(const char* in, uint32_t size, uint32_t star // add string subarg std::string val=std::string(in+j, i-j); - ret->sa.push_back(new string_subarg(val)); + if(val != "") + ret->sa.push_back(new string_subarg(val)); } catch(ztd::format_error& e) diff --git a/src/resolve.cpp b/src/resolve.cpp index 6e19a75..bea02cb 100644 --- a/src/resolve.cpp +++ b/src/resolve.cpp @@ -7,29 +7,45 @@ #include "options.hpp" #include "util.hpp" #include "parse.hpp" +#include "timer.hpp" + +std::vector included; // -- CD STUFF -- std::string pwd() { char buf[2048]; - if(getcwd(buf, 2048) != NULL) + if(getcwd(buf, 2048) == NULL) { - std::string ret=ztd::exec("pwd").first; // getcwd failed: call pwd - ret.pop_back(); - return ret; + throw std::runtime_error("getcwd failed with errno "+std::to_string(errno)); } return std::string(buf); } +bool add_include(std::string const& file) +{ + std::string truepath; + if(file[0] == '/') + truepath = file; + else + truepath=pwd() + '/' + file; + for(auto it: included) + { + if(it == truepath) + return false; + } + included.push_back(truepath); + return true; +} + // returns path to old dir std::string _pre_cd(shmain* parent) { if(parent->is_dev_file() || parent->filename == "") return ""; std::string dir=pwd(); - std::string cddir=ztd::exec("dirname", parent->filename).first; - cddir.pop_back(); + std::string cddir=dirname(parent->filename); if(chdir(cddir.c_str()) != 0) throw std::runtime_error("Cannot cd to '"+cddir+"'"); return dir; @@ -47,8 +63,6 @@ void _cd(std::string const& dir) std::vector> do_include_raw(condlist* cmd, shmain* parent, std::string* ex_dir=nullptr) { std::vector> ret; - if(!g_include) - return ret; ztd::option_set opts = create_include_opts(); std::vector rargs; @@ -73,14 +87,16 @@ std::vector> do_include_raw(condlist* cmd, s for(auto it: rargs) command += it + ' '; command += "; do echo $I ; done"; - std::string inc=ztd::sh(command); + std::string inc=ztd::sh(command); /* takes 1ms */ auto v = split(inc, '\n'); for(auto it: v) { if(opts['f'] || add_include(it)) + { ret.push_back(std::make_pair(it, import_file(it))); + } } if(ex_dir==nullptr) @@ -92,13 +108,10 @@ std::vector> do_include_raw(condlist* cmd, s std::vector do_include_parse(condlist* cmd, shmain* parent) { std::vector ret; - if(!g_include) - return ret; std::string dir; auto incs=do_include_raw(cmd, parent, &dir); - for(auto it: incs) { shmain* sh=parse_text(it.second, it.first); @@ -119,8 +132,6 @@ std::vector do_include_parse(condlist* cmd, shmain* parent) std::pair do_resolve_raw(condlist* cmd, shmain* parent, std::string* ex_dir=nullptr) { std::pair ret; - if(!g_resolve) - return ret; ztd::option_set opts = create_resolve_opts(); std::vector rargs; @@ -169,8 +180,6 @@ std::pair do_resolve_raw(condlist* cmd, shmain* parent std::vector do_resolve_parse(condlist* cmd, shmain* parent) { std::vector ret; - if(!g_resolve) - return ret; std::pair p; try @@ -199,26 +208,27 @@ std::vector do_resolve_parse(condlist* cmd, shmain* parent) // -- OBJECT CALLS -- -std::vector resolve_condlist(condlist* in, shmain* parent) +std::pair< std::vector , bool > resolve_condlist(condlist* in, shmain* parent) { cmd* tc = in->first_cmd(); if(tc == nullptr) - return std::vector(); + return std::make_pair(std::vector(), false); std::string const& strcmd=tc->firstarg_string(); - if(strcmd == "%include") - return do_include_parse(in, parent); - else if(strcmd == "%resolve") - return do_resolve_parse(in, parent); + if(g_include && strcmd == "%include") + return std::make_pair(do_include_parse(in, parent), true); + else if(g_resolve && strcmd == "%resolve") + return std::make_pair(do_resolve_parse(in, parent), true); else - return std::vector(); + return std::make_pair(std::vector(), false); } -std::vector resolve_arg(arg* in, shmain* parent, bool forcequote=false) +std::pair< std::vector , bool > resolve_arg(arg* in, shmain* parent, bool forcequote=false) { std::vector ret; arg* ta=nullptr; + bool has_resolved=false; uint32_t j=0; for(uint32_t i=0 ; isa.size() ; i++) { @@ -234,18 +244,21 @@ std::vector resolve_arg(arg* in, shmain* parent, bool forcequote=false) continue; std::string strcmd=c->firstarg_string(); std::string fulltext; - if(strcmd == "%include") + if(g_include && strcmd == "%include") { for(auto it: do_include_raw(tc, parent) ) fulltext += it.second; } - else if(strcmd == "%resolve") + else if(g_resolve && strcmd == "%resolve") { fulltext = do_resolve_raw(tc, parent).second; } else // skip continue; + // start of resolve + has_resolved = true; + if(tsh->quoted || forcequote) { stringReplace(fulltext, "\"", "\\\""); @@ -301,34 +314,42 @@ std::vector resolve_arg(arg* in, shmain* parent, bool forcequote=false) delete ta; in->sa.resize(0); } - return ret; + return std::make_pair(ret, has_resolved); } // -- RECURSIVE CALL -- -void resolve_recurse(_obj* o, shmain* parent) +bool resolve_recurse(_obj* o, shmain* parent) { switch(o->type) { + // in case of applicable object: + // check every sub-object + // execute resolve manually + // instruct parent resolve to not resolve case _obj::_list : { auto t = dynamic_cast(o); for(uint32_t i=0 ; icls.size() ; i++) { - std::vector r=resolve_condlist(t->cls[i], parent); - if(r.size()>0) + auto r=resolve_condlist(t->cls[i], parent); + if(r.second) { // add new cls after current - t->cls.insert(t->cls.begin()+i+1, r.begin(), r.end()); + t->cls.insert(t->cls.begin()+i+1, r.first.begin(), r.first.end()); // delete current delete t->cls[i]; t->cls.erase(t->cls.begin()+i); - // back to previous object - i--; + // skip to after inserted cls + i += r.first.size()-1; + } + else + { + resolve(t->cls[i], parent); } } - // list + return false; } break; case _obj::_arglist : { @@ -336,24 +357,33 @@ void resolve_recurse(_obj* o, shmain* parent) for(uint32_t i=0 ; iargs.size() ; i++) { auto r=resolve_arg(t->args[i], parent); - if(r.size()>0) + if(r.first.size()>0) { // add new args - t->args.insert(t->args.begin()+i+1, r.begin(), r.end()); + t->args.insert(t->args.begin()+i+1, r.first.begin(), r.first.end()); // delete current delete t->args[i]; t->args.erase(t->args.begin()+i); - i += r.size()-1; + i += r.first.size()-1; + } + else + { + resolve(t->args[i], parent); } } - // arglist - return; + return false; } break; case _obj::block_cmd : { auto t = dynamic_cast(o); - for(auto it: t->var_assigns) + for(auto it: t->var_assigns) // var assigns + { resolve_arg(it.second, parent, true); // force quoted + resolve(it.second, parent); + } + resolve(t->redirs, parent); + resolve(t->args, parent); + return false; }; break; case _obj::block_case : { @@ -361,18 +391,27 @@ void resolve_recurse(_obj* o, shmain* parent) for(auto sc: t->cases) { resolve_arg(t->carg, parent, true); // force quoted + resolve(t->carg, parent); + + for(auto it: sc.first) { - for(auto it: sc.first) - resolve_arg(it, parent, true); // force quoted + resolve_arg(it, parent, true); // force quoted + resolve(it, parent); } + resolve(sc.second, parent); } }; break; default: break; } - return; + return true; } // recursive call of resolve +void resolve(_obj* in, shmain* parent) +{ + recurse(resolve_recurse, in, parent); +} + void resolve(shmain* sh) { recurse(resolve_recurse, sh, sh); diff --git a/src/util.cpp b/src/util.cpp index 771d0dc..10a0214 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -20,6 +20,24 @@ std::string indent(int n) return ret; } +std::string basename(std::string const& in) +{ + size_t slr=in.rfind('/'); + if(slr != std::string::npos) + return in.substr(slr); + else + return in; +} + +std::string dirname(std::string const& in) +{ + size_t slr=in.rfind('/'); + if(slr != std::string::npos) + return in.substr(0,slr); + else + return "."; +} + std::vector split(std::string const& in, const char* splitters) { uint32_t i=0,j=0; @@ -229,8 +247,7 @@ int execute(shmain* sh, std::vector& args) { std::string data=sh->generate(); - std::string filename=ztd::exec("basename", args[0]).first; - filename.pop_back(); + std::string filename = basename(args[0]); // generate path std::string tmpdir = (getenv("TMPDIR") != NULL) ? getenv("TMPDIR") : "/tmp" ;