From 0b46581b227da56767cee75110db0e314ad9dace Mon Sep 17 00:00:00 2001 From: zawwz Date: Thu, 28 Oct 2021 13:33:06 +0200 Subject: [PATCH] extend minify quotes to var assigns --- include/struc.hpp | 9 +- src/minify.cpp | 255 ++++++++++++++++++++++++++++----------------- src/parse.cpp | 1 + src/processing.cpp | 1 + 4 files changed, 168 insertions(+), 98 deletions(-) diff --git a/include/struc.hpp b/include/struc.hpp index 2e5f3d0..70d7062 100644 --- a/include/struc.hpp +++ b/include/struc.hpp @@ -170,9 +170,9 @@ public: class arg : public _obj { public: - arg() { type=_obj::_arg; } - arg(std::string const& str) { type=_obj::_arg; this->set(str);} - arg(subarg* in) { type=_obj::_arg; sa.push_back(in); } + arg() { type=_obj::_arg; forcequoted=false; } + arg(std::string const& str, bool fquote=false) { type=_obj::_arg; this->set(str); forcequoted=fquote; } + arg(subarg* in, bool fquote=false) { type=_obj::_arg; sa.push_back(in); forcequoted=fquote; } ~arg() { for(auto it: sa) delete it; } void set(std::string const& str); @@ -187,6 +187,9 @@ public: std::vector sa; + // is forcequoted: var assign + bool forcequoted; + bool is_string(); // return if is a string and only one subarg std::string string(); diff --git a/src/minify.cpp b/src/minify.cpp index 18a3123..65601f5 100644 --- a/src/minify.cpp +++ b/src/minify.cpp @@ -69,8 +69,20 @@ bool r_replace_var(_obj* in, strmap_t* varmap) return true; } -const char* singlequote_escape_char=" \\\t!\"()|&*?~><#"; -const char* doublequote_escape_char=" \t'|&\\*()?~><#"; +const char* singlequote_escape_char=" \\\t!\"()|&*?~><#$"; +const char* doublequote_escape_char=" \t'|&\\*()?~><#$"; +uint32_t count_escape_char(std::string& in, uint32_t i, bool doublequote, std::string** estr, uint32_t* ei) { + if( ( doublequote && is_in(in[i], doublequote_escape_char) ) || + ( !doublequote && is_in(in[i], singlequote_escape_char) ) ) { + *estr = ∈ + *ei = i; + return 1; + } + else if(in[i] == '\n') // \n: can't remove quotes + return 2; + return 0; +} + uint32_t count_escape_chars(std::string const& in, bool doublequote) { uint32_t r=0; @@ -105,88 +117,161 @@ bool is_this_quote(char c, bool is_doublequote) return c == '\''; } -void do_one_minify_quotes(string_subarg* in, bool prev_is_var, bool start_quoted) +bool is_varname(const char c) { + return is_alphanum(c) || c == '_'; +} + +void do_minify_quotes(arg* in) { - std::string& val = in->val; - if(val.size() <= 1) - return; - if(start_quoted) // don't handle start quoted for now - return; - if(val[0] == '"' && prev_is_var && (is_alphanum(val[1]) || val[1] == '_') ) // removing quote would change varname: skip - return; - if(val[0] == '\'' && prev_is_var && (is_alphanum(val[1]) || val[1] == '_') ) // removing quote would change varname: skip - return; - - uint32_t i=0, j=0; - while( i < val.size() ) + auto t = in->sa.begin(); + // global loop + while(true) { - bool doublequote=false; - while(i=val.size()) // end before finding quote: exit - return; - if(val[i] == '"') - doublequote=true; - - j=i; - i++; - - if(doublequote) - { - while(isa.end()) + return; + while((*t)->type != _obj::subarg_string) + { + // previous is alphanum var: removing quote can change varname + if((*t)->type == _obj::subarg_variable) { + variable_subarg* vs = dynamic_cast(*t); + if(vs->var != nullptr && !vs->var->is_manip && vs->var->varname.size()>0 && !(is_in(vs->var->varname[0], SPECIAL_VARS) || is_num(vs->var->varname[0]) ) ) + prev_is_var = true; + } + else + prev_is_var = false; + t++; + // quit when reached end of arg + if(t == in->sa.end()) + return; + i=0; + } + std::string& val = dynamic_cast(*t)->val; + while(isa.end()) + return; + while((*t)->type != _obj::subarg_string) + { + // previous is alphanum var: removing quote can change varname + if((*t)->type == _obj::subarg_variable) { + variable_subarg* vs = dynamic_cast(*t); + if(vs->var != nullptr && !vs->var->is_manip && vs->var->varname.size()>0 && !(is_in(vs->var->varname[0], SPECIAL_VARS) || is_num(vs->var->varname[0]) ) ) + end_is_var = true; + } + else + end_is_var = false; + has_substitution=true; + t++; + // quit when reached end of arg + if(t == in->sa.end()) + return; + i=0; + } + std::string& val = dynamic_cast(*t)->val; + if(doublequote) + { + while(i=val.size()) { // end before finding quote: continue looping + t++; + i=0; + continue; + } + } + else + { + while(i=val.size()) { // end before finding quote: continue looping + t++; + i=0; + continue; + } + } + strend=&val; + quoteend=i; + break; + } // end of quote end loop + // has a substitution that can expand: don't dequote + if(!in->forcequoted && has_substitution) { i++; + continue; } - if(i>=val.size()) // end before finding quote: exit - return; - } - else - { - while(i 1) { i++; - if(i>=val.size()) // end before finding quote: exit - return; + continue; + } + // removing quotes changes variable name: don't dequote + if( ( prev_is_var && quotestart == 0 && strstart->size()>1 && is_varname((*strstart)[1]) ) || + ( end_is_var && quoteend == 0 && strend->size()>1 && is_varname((*strend)[1])) ) { + i++; + continue; + } + + // prev char is a $ would create variable names: don't dequote + if( quotestart >= 1 && (*strstart)[quotestart-1] == '$' && (!doublequote || + ( strstart->size()>2 && is_varname((*strstart)[quotestart+1]))) + ) { + i++; + continue; + } + + // do dequote + strend->erase(quoteend, 1); + // needs one escape + if(ce == 1) { + escapestr->insert(escapepos, "\\"); + } + strstart->erase(quotestart, 1); } - uint32_t ce = count_escape_chars(val.substr(j+1, i-j-1), doublequote); - if(ce == 0) - { - val.erase(val.begin()+i); - val.erase(val.begin()+j); - } - else if(ce == 1) // only one char to escape: can save some space - { - val.erase(val.begin()+i); - val.erase(val.begin()+j); - uint32_t k; - if(doublequote) - { - for(k=j; k(in); - for(uint32_t i=0; isa.size(); i++) - { - // iterate subargs - if(t->sa[i]->type == _obj::subarg_string) - { - // has to be a string - string_subarg* ss = dynamic_cast(t->sa[i]); - bool prev_is_var=false; - if(i>0 && t->sa[i-1]->type == _obj::subarg_variable) - { - // previous subarg is a direct variable (removing a quote could change variable name) - variable_subarg* vs = dynamic_cast(t->sa[i-1]); - if(vs->var != nullptr && vs->var->is_manip == false && vs->var->varname.size()>0 && !(is_in(vs->var->varname[0], SPECIAL_VARS) || is_alpha(vs->var->varname[0]) ) ) - prev_is_var=true; - } - if(t->sa.size()==1 && (ss->val=="\"\"" || ss->val=="''") ) // single argument as "" or '': don't minify - continue; - do_one_minify_quotes(ss, prev_is_var, i>0 && t->sa[i-1]->quoted); - } - //if() - } + do_minify_quotes(t); }; break; case _obj::_redirect: { // for redirects: don't minify quotes on here documents diff --git a/src/parse.cpp b/src/parse.cpp index 1ca1f9b..ade4367 100644 --- a/src/parse.cpp +++ b/src/parse.cpp @@ -1267,6 +1267,7 @@ parse_context parse_cmd_varassigns(cmd* in, parse_context ctx, bool cmdassign=fa ctx=pp.second; } ta->insert(0, strop); + ta->forcequoted = !cmdassign; ret->push_back(std::make_pair(vp.first, ta)); ctx.i=skip_chars(ctx, SPACES); } diff --git a/src/processing.cpp b/src/processing.cpp index e047fc4..c1a5291 100644 --- a/src/processing.cpp +++ b/src/processing.cpp @@ -659,6 +659,7 @@ std::string gen_json_struc(_obj* o) { arg* t = dynamic_cast(o); vec.push_back(std::make_pair(quote_string("type"), quote_string("arg") ) ); + vec.push_back(std::make_pair(quote_string("forcequoted"), boolstring(t->forcequoted))); std::vector tvec; for(auto it: t->sa) tvec.push_back(gen_json_struc(it));