#include "minify.hpp" #include "parse.hpp" #include "recursive.hpp" #include "processing.hpp" #include "util.hpp" std::vector cmd_t::subarg_vars() { std::vector ret; if(args==nullptr || args->size()<=0) return ret; if(this->is_argvar()) { for(uint32_t i=1; isize(); i++) { arg_t* ta = args->args[i]; if(ta->sa.size() < 1 || ta->sa[0]->type != _obj::subarg_string) continue; if(ta->sa.size() >= 1 && is_varname(ta->sa[0]->generate(0))) ret.push_back(ta->sa[0]); } } return ret; } /** RECURSIVES **/ bool r_replace_fct(_obj* in, strmap_t* fctmap) { switch(in->type) { case _obj::block_function: { function_t* t = dynamic_cast(in); auto el=fctmap->find(t->name); if(el!=fctmap->end()) t->name = el->second; }; break; case _obj::block_cmd: { cmd_t* t = dynamic_cast(in); std::string cmdname = t->arg_string(0); auto el=fctmap->find(cmdname); if(el!=fctmap->end()) { delete t->args->args[0]; t->args->args[0] = new arg_t(el->second); } }; break; default: break; } return true; } bool r_replace_var(_obj* in, strmap_t* varmap) { switch(in->type) { case _obj::variable: { variable_t* t = dynamic_cast(in); auto el=varmap->find(t->varname); if(el!=varmap->end()) t->varname = el->second; }; break; default: break; } return true; } const char* singlequote_escape_char=" \\\t!\"()|&*?~><#$"; const char* doublequote_escape_char=" \t'|&\\*()?~><#$"; uint32_t count_escape_char(std::string& in, uint32_t i, bool doublequote, std::string** estr, uint32_t* ei) { if( ( doublequote && is_in(in[i], doublequote_escape_char) ) || ( !doublequote && is_in(in[i], singlequote_escape_char) ) ) { *estr = ∈ *ei = i; return 1; } else if(in[i] == '\n') // \n: can't remove quotes return 2; return 0; } uint32_t count_escape_chars(std::string const& in, bool doublequote) { uint32_t r=0; for(uint32_t i=0; i=in.size()) continue; else if(is_in(in[i+1], SPECIAL_VARS) || is_alphanum(in[i+1]) || in[i+1] == '_' || in[i+1] == '(') { if(doublequote) // doublequote: can't remove otherwise not quoted var return 2; r++; } } } return r; } bool is_this_quote(char c, bool is_doublequote) { if(is_doublequote) return c == '"'; else return c == '\''; } bool is_varname(const char c) { return is_alphanum(c) || c == '_'; } void do_minify_quotes(arg_t* in) { auto t = in->sa.begin(); // global loop while(true) { uint32_t i=0; // one iteration loop while(true) { bool doublequote=false; bool prev_is_var=false; bool end_is_var=false; bool has_substitution=false; std::string* strstart = nullptr; uint32_t quotestart=0; std::string* strend = nullptr; uint32_t quoteend=0; std::string* escapestr = nullptr; uint32_t escapepos=0; uint32_t ce=0; // loop to find start of quote while(true) { // reached end: quit if(t == in->sa.end()) return; while((*t)->type != _obj::subarg_string) { // previous is alphanum var: removing quote can change varname if((*t)->type == _obj::subarg_variable) { subarg_variable_t* vs = dynamic_cast(*t); if(vs->var != nullptr && !vs->var->is_manip && vs->var->varname.size()>0 && !(is_in(vs->var->varname[0], SPECIAL_VARS) || is_num(vs->var->varname[0]) ) ) prev_is_var = true; } else prev_is_var = false; t++; // quit when reached end of arg if(t == in->sa.end()) return; i=0; } std::string& val = dynamic_cast(*t)->val; while(isa.end()) return; while((*t)->type != _obj::subarg_string) { // previous is alphanum var: removing quote can change varname if((*t)->type == _obj::subarg_variable) { subarg_variable_t* vs = dynamic_cast(*t); if(vs->var != nullptr && !vs->var->is_manip && vs->var->varname.size()>0 && !(is_in(vs->var->varname[0], SPECIAL_VARS) || is_num(vs->var->varname[0]) ) ) end_is_var = true; } else end_is_var = false; has_substitution=true; t++; // quit when reached end of arg if(t == in->sa.end()) return; i=0; } std::string& val = dynamic_cast(*t)->val; if(doublequote) { while(i=val.size()) { // end before finding quote: continue looping t++; i=0; continue; } } else { while(i=val.size()) { // end before finding quote: continue looping t++; i=0; continue; } } strend=&val; quoteend=i; break; } // end of quote end loop // has a substitution that can expand: don't dequote if(!in->forcequoted && has_substitution) { i++; continue; } // too many escapes: don't dequote if(ce > 1) { i++; continue; } // removing quotes changes variable name: don't dequote if( ( prev_is_var && quotestart == 0 && strstart->size()>1 && is_varname((*strstart)[1]) ) || ( end_is_var && quoteend == 0 && strend->size()>1 && is_varname((*strend)[1])) ) { i++; continue; } // prev char is a $ would create variable names: don't dequote if( quotestart >= 1 && (*strstart)[quotestart-1] == '$' && (!doublequote || ( strstart->size()>2 && is_varname((*strstart)[quotestart+1]))) ) { i++; continue; } // do dequote strend->erase(quoteend, 1); // needs one escape if(ce == 1) { escapestr->insert(escapepos, "\\"); } strstart->erase(quotestart, 1); } } } void do_minify_dollar(subarg_string_t* in) { std::string& val = in->val; for(uint32_t i=0; itype) { case _obj::arg: { arg_t* t = dynamic_cast(in); do_minify_quotes(t); }; break; case _obj::subarg_string: { subarg_string_t* t = dynamic_cast(in); do_minify_dollar(t); }; break; case _obj::redirect: { // for redirects: don't minify quotes on here documents redirect_t* t = dynamic_cast(in); if(t->here_document != nullptr) { recurse(r_minify_useless_quotes, t->target); for(auto it: t->here_document->sa) { if(it->type!=_obj::subarg_string) { recurse(r_minify_useless_quotes, it); } } // don't recurse on the rest return false; } } break; default: break; } return true; } /** NAME MINIFYING **/ char nchar(uint32_t n) { if(n<26) return 'a'+n; else if(n<52) return 'A'+(n-26); else if(n==52) return '_'; else if(n<63) return '0'+(n-53); else return 0; } std::string minimal_name(uint32_t n) { if(n<53) { std::string ret; ret += nchar(n); return ret; } else { uint32_t k=n%53; uint32_t q=n/53; std::string ret; ret += nchar(k); ret += nchar(q); while(q>64) { q /= 64; ret += nchar(q); } return ret; } } // vars: input variables // excluded: excluded variables to make sure there is no collision strmap_t gen_minimal_map(countmap_t const& vars, set_t const& excluded) { strmap_t ret; auto ordered = sort_by_value(vars); uint32_t n=0; for(std::pair it: ordered) { std::string newname; do { newname = minimal_name(n); n++; } while( excluded.find(newname) != excluded.end() ); ret.insert(std::make_pair(it.first, newname)); } return ret; } // calls void minify_var(_obj* in, std::regex const& exclude) { // countmap_t vars; set_t excluded; strmap_t varmap; // get vars varmap_get(in, exclude); // concatenate excluded and reserved concat_sets(excluded, m_excluded_var); concat_sets(excluded, all_reserved_words); // create mapping varmap=gen_minimal_map(m_vars, excluded); // perform replace recurse(r_replace_var, in, &varmap); require_rescan_var(); } void minify_fct(_obj* in, std::regex const& exclude) { // countmap_t fcts, cmdmap; set_t excluded, unsets; strmap_t fctmap; // get fcts and cmds fctcmdmap_get(in, exclude, regex_null); recurse(r_get_unsets, in, &unsets); // concatenate cmds, excluded and reserved excluded=map_to_set(m_cmds); exclude_sets(excluded, map_to_set(m_fcts)); concat_sets(excluded, m_excluded_fct); concat_sets(excluded, unsets); concat_sets(excluded, all_reserved_words); // create mapping m_fcts = combine_common(m_fcts, m_cmds); fctmap=gen_minimal_map(m_fcts, excluded); // perform replace recurse(r_replace_fct, in, &fctmap); require_rescan_fct(); require_rescan_cmd(); } bool delete_unused_fct(_obj* in, std::regex const& exclude) { set_t unused; // get fcts and cmds fctcmdmap_get(in, exclude, regex_null); // find unused fcts for(auto it: m_fcts) { if(m_cmds.find(it.first) == m_cmds.end()) unused.insert(it.first); } // perform deletion if(unused.size()>0) { recurse(r_delete_fct, in, &unused); require_rescan_all(); return true; } else return false; } bool delete_unused_var(_obj* in, std::regex const& exclude) { set_t unused; // get fcts and cmds varmap_get(in, exclude); // find unused vars for(auto it: m_vardefs) { if(it.first!="" && m_varcalls.find(it.first) == m_varcalls.end()) unused.insert(it.first); } // perform deletion if(unused.size()>0) { recurse(r_delete_var, in, &unused); require_rescan_all(); return true; } else return false; } bool delete_unused_both(_obj* in, std::regex const& var_exclude, std::regex const& fct_exclude) { set_t unused_var, unused_fct; // get all allmaps_get(in, var_exclude, fct_exclude, regex_null); // find unused for(auto it: m_vardefs) { if(it.first!="" && m_varcalls.find(it.first) == m_varcalls.end()) unused_var.insert(it.first); } for(auto it: m_fcts) { if(m_cmds.find(it.first) == m_cmds.end()) unused_fct.insert(it.first); } if(unused_var.size()>0 || unused_fct.size()>0) { recurse(r_delete_varfct, in, &unused_var, &unused_fct); require_rescan_all(); return true; } return false; } void delete_unused(_obj* in, std::regex const& var_exclude, std::regex const& fct_exclude) { while(delete_unused_both(in, var_exclude, fct_exclude)); // keep deleting until both no deletion } // minify ${var} to $var bool r_minify_empty_manip(_obj* in) { switch(in->type) { case _obj::arg: { arg_t* t = dynamic_cast(in); for(uint32_t i=0; isa.size(); i++) { if(t->sa[i]->type == _obj::subarg_variable) { // has to be a variable subarg_variable_t* ss = dynamic_cast(t->sa[i]); if(ss->var->is_manip) { // if is a manip: possibility to skip it if(ss->var->index != nullptr) // is a var bash array: skip return true; if(i+1sa.size() && t->sa[i+1]->type == _obj::subarg_string) { // if next subarg is a string: check its first char subarg_string_t* ss = dynamic_cast(t->sa[i+1]); char c = ss->val[0]; // if its first would extend the var name: skip if(is_alphanum(c) || c == '_') continue; } // if has no actual manipulation operation: set it to not manip if(ss->var->manip == nullptr || ss->var->manip->sa.size() == 0) ss->var->is_manip = false; } } } }; break; default: break; } return true; } block_t* do_one_minify_single_block(block_t* in) { block_t* ret=nullptr; list_t* l=nullptr; if(in->type == _obj::block_brace) l = dynamic_cast(in)->lst; else if(in->type == _obj::block_subshell) l = dynamic_cast(in)->lst; if(l == nullptr) return nullptr; // not a single cmd/block: not applicable if(l->cls.size() != 1 || l->cls[0]->pls.size() != 1 || l->cls[0]->pls[0]->cmds.size() != 1) return nullptr; ret = l->cls[0]->pls[0]->cmds[0]; // if is a subshell and has some env set: don't remove it if(in->type == _obj::block_subshell && has_env_set(ret)) return nullptr; return ret; } bool r_minify_single_block(_obj* in) { switch(in->type) { case _obj::pipeline: { bool has_operated=false; do { // loop operating on current // (if has operated, current object has changed) has_operated=false; pipeline_t* t = dynamic_cast(in); for(uint32_t i=0; icmds.size(); i++) { block_t* ret = do_one_minify_single_block(t->cmds[i]); if(ret != nullptr) { // concatenate redirects for(uint32_t j=0; jcmds[i]->redirs.size(); j++) ret->redirs.insert(ret->redirs.begin()+j, t->cmds[i]->redirs[j]); // deindex t->cmds[i]->redirs.resize(0); if(t->cmds[i]->type == _obj::block_brace) dynamic_cast(t->cmds[i])->lst->cls[0]->pls[0]->cmds[0] = nullptr; else if(t->cmds[i]->type == _obj::block_subshell) dynamic_cast(t->cmds[i])->lst->cls[0]->pls[0]->cmds[0] = nullptr; // replace value delete t->cmds[i]; t->cmds[i] = ret; has_operated=true; } } } while(has_operated); }; break; default: break; } return true; } bool r_has_backtick(_obj* in, bool* r) { if(*r) return false; switch(in->type) { case _obj::subarg_subshell: { subarg_subshell_t* t = dynamic_cast(in); if(t->backtick) { *r = true; return false; } }; break; default: break; } return true; } bool r_minify_backtick(_obj* in) { switch(in->type) { case _obj::subarg_subshell: { subarg_subshell_t* t = dynamic_cast(in); if(!t->backtick) { bool has_backtick_child=false; recurse(r_has_backtick, t->sbsh, &has_backtick_child); if(has_backtick_child) return false; t->backtick = true; } return false; }; break; default: break; } return true; } // optimisation for processors that don't have recurse-cancellation bool r_minify(_obj* in) { r_minify_empty_manip(in); r_minify_single_block(in); r_do_string_processor(in); return true; } void minify_generic(_obj* in) { recurse(r_minify, in); recurse(r_minify_backtick, in); recurse(r_minify_useless_quotes, in); }