lxsh/src/minify.cpp
2021-11-03 11:41:42 +01:00

667 lines
17 KiB
C++

#include "minify.hpp"
#include "parse.hpp"
#include "recursive.hpp"
#include "processing.hpp"
#include "util.hpp"
std::vector<subarg_t*> cmd_t::subarg_vars()
{
std::vector<subarg_t*> ret;
if(args==nullptr || args->size()<=0)
return ret;
if(this->is_argvar())
{
for(uint32_t i=1; i<args->size(); i++)
{
arg_t* ta = args->args[i];
if(ta->sa.size() < 1 || ta->sa[0]->type != _obj::subarg_string)
continue;
if(ta->sa.size() >= 1 && is_varname(ta->sa[0]->generate(0)))
ret.push_back(ta->sa[0]);
}
}
return ret;
}
/** RECURSIVES **/
bool r_replace_fct(_obj* in, strmap_t* fctmap)
{
switch(in->type)
{
case _obj::block_function: {
function_t* t = dynamic_cast<function_t*>(in);
auto el=fctmap->find(t->name);
if(el!=fctmap->end())
t->name = el->second;
}; break;
case _obj::block_cmd: {
cmd_t* t = dynamic_cast<cmd_t*>(in);
std::string cmdname = t->arg_string(0);
auto el=fctmap->find(cmdname);
if(el!=fctmap->end())
{
delete t->args->args[0];
t->args->args[0] = new arg_t(el->second);
}
}; break;
default: break;
}
return true;
}
bool r_replace_var(_obj* in, strmap_t* varmap)
{
switch(in->type)
{
case _obj::variable: {
variable_t* t = dynamic_cast<variable_t*>(in);
auto el=varmap->find(t->varname);
if(el!=varmap->end())
t->varname = el->second;
}; break;
default: break;
}
return true;
}
const char* singlequote_escape_char=" \\\t!\"()|&*?~><#$";
const char* doublequote_escape_char=" \t'|&\\*()?~><#$";
uint32_t count_escape_char(std::string& in, uint32_t i, bool doublequote, std::string** estr, uint32_t* ei) {
if( ( doublequote && is_in(in[i], doublequote_escape_char) ) ||
( !doublequote && is_in(in[i], singlequote_escape_char) ) ) {
*estr = &in;
*ei = i;
return 1;
}
else if(in[i] == '\n') // \n: can't remove quotes
return 2;
return 0;
}
uint32_t count_escape_chars(std::string const& in, bool doublequote)
{
uint32_t r=0;
for(uint32_t i=0; i<in.size(); i++)
{
if(doublequote && is_in(in[i], doublequote_escape_char))
r++;
else if(!doublequote && is_in(in[i], singlequote_escape_char))
r++;
else if(in[i] == '\n') // \n: can't remove quotes
return 2;
else if(in[i] == '$')
{
if(i+1>=in.size())
continue;
else if(is_in(in[i+1], SPECIAL_VARS) || is_alphanum(in[i+1]) || in[i+1] == '_' || in[i+1] == '(')
{
if(doublequote) // doublequote: can't remove otherwise not quoted var
return 2;
r++;
}
}
}
return r;
}
bool is_this_quote(char c, bool is_doublequote)
{
if(is_doublequote)
return c == '"';
else
return c == '\'';
}
bool is_varname(const char c) {
return is_alphanum(c) || c == '_';
}
void do_minify_quotes(arg_t* in)
{
auto t = in->sa.begin();
// global loop
while(true)
{
uint32_t i=0;
// one iteration loop
while(true)
{
bool doublequote=false;
bool prev_is_var=false;
bool end_is_var=false;
bool has_substitution=false;
std::string* strstart = nullptr;
uint32_t quotestart=0;
std::string* strend = nullptr;
uint32_t quoteend=0;
std::string* escapestr = nullptr;
uint32_t escapepos=0;
uint32_t ce=0;
// loop to find start of quote
while(true)
{
// reached end: quit
if(t == in->sa.end())
return;
while((*t)->type != _obj::subarg_string)
{
// previous is alphanum var: removing quote can change varname
if((*t)->type == _obj::subarg_variable) {
subarg_variable_t* vs = dynamic_cast<subarg_variable_t*>(*t);
if(vs->var != nullptr && !vs->var->is_manip && vs->var->varname.size()>0 && !(is_in(vs->var->varname[0], SPECIAL_VARS) || is_num(vs->var->varname[0]) ) )
prev_is_var = true;
}
else
prev_is_var = false;
t++;
// quit when reached end of arg
if(t == in->sa.end())
return;
i=0;
}
std::string& val = dynamic_cast<subarg_string_t*>(*t)->val;
while(i<val.size() && !( val[i] == '\'' || val[i] == '"') )
{
if(val[i] == '\\')
i++;
i++;
}
// if found: break and go to next step
if(i<val.size()) {
if(val[i] == '"')
doublequote=true;
strstart=&val;
quotestart=i;
i++;
break;
}
else {
t++;
i=0;
}
} // end of quote start loop
// loop to end of quote
while(true)
{
// reached end: quit
if(t == in->sa.end())
return;
while((*t)->type != _obj::subarg_string)
{
// previous is alphanum var: removing quote can change varname
if((*t)->type == _obj::subarg_variable) {
subarg_variable_t* vs = dynamic_cast<subarg_variable_t*>(*t);
if(vs->var != nullptr && !vs->var->is_manip && vs->var->varname.size()>0 && !(is_in(vs->var->varname[0], SPECIAL_VARS) || is_num(vs->var->varname[0]) ) )
end_is_var = true;
}
else
end_is_var = false;
has_substitution=true;
t++;
// quit when reached end of arg
if(t == in->sa.end())
return;
i=0;
}
std::string& val = dynamic_cast<subarg_string_t*>(*t)->val;
if(doublequote)
{
while(i<val.size() && val[i] != '"')
{
if(val[i] == '\\') {
ce += count_escape_char(val, i++, doublequote, &escapestr, &escapepos);
}
ce += count_escape_char(val, i++, doublequote, &escapestr, &escapepos);
}
if(i>=val.size()) { // end before finding quote: continue looping
t++;
i=0;
continue;
}
}
else
{
while(i<val.size() && val[i] != '\'')
ce += count_escape_char(val, i++, doublequote, &escapestr, &escapepos);
if(i>=val.size()) { // end before finding quote: continue looping
t++;
i=0;
continue;
}
}
strend=&val;
quoteend=i;
break;
} // end of quote end loop
// has a substitution that can expand: don't dequote
if(!in->forcequoted && has_substitution) {
i++;
continue;
}
// too many escapes: don't dequote
if(ce > 1) {
i++;
continue;
}
// removing quotes changes variable name: don't dequote
if( ( prev_is_var && quotestart == 0 && strstart->size()>1 && is_varname((*strstart)[1]) ) ||
( end_is_var && quoteend == 0 && strend->size()>1 && is_varname((*strend)[1])) ) {
i++;
continue;
}
// prev char is a $ would create variable names: don't dequote
if( quotestart >= 1 && (*strstart)[quotestart-1] == '$' && (!doublequote ||
( strstart->size()>2 && is_varname((*strstart)[quotestart+1])))
) {
i++;
continue;
}
// do dequote
strend->erase(quoteend, 1);
// needs one escape
if(ce == 1) {
escapestr->insert(escapepos, "\\");
}
strstart->erase(quotestart, 1);
}
}
}
void do_minify_dollar(subarg_string_t* in)
{
std::string& val = in->val;
for(uint32_t i=0; i<val.size(); i++) {
// skip singlequote strings
if(val[i] == '\'') {
i++;
while(val[i] != '\'')
i++;
}
// has \$
if(i+1<val.size() && val[i] == '\\' && val[i+1] == '$') {
// char after $ is a varname
if(i+2<val.size() && (is_varname(val[i+2]) || is_in(val[i+2], SPECIAL_VARS)) )
continue;
val.erase(i, 1);
}
}
}
bool r_minify_useless_quotes(_obj* in)
{
switch(in->type)
{
case _obj::arg: {
arg_t* t = dynamic_cast<arg_t*>(in);
do_minify_quotes(t);
}; break;
case _obj::subarg_string: {
subarg_string_t* t = dynamic_cast<subarg_string_t*>(in);
do_minify_dollar(t);
}; break;
case _obj::redirect: {
// for redirects: don't minify quotes on here documents
redirect_t* t = dynamic_cast<redirect_t*>(in);
if(t->here_document != nullptr)
{
recurse(r_minify_useless_quotes, t->target);
for(auto it: t->here_document->sa)
{
if(it->type!=_obj::subarg_string) {
recurse(r_minify_useless_quotes, it);
}
}
// don't recurse on the rest
return false;
}
} break;
default: break;
}
return true;
}
/** NAME MINIFYING **/
char nchar(uint32_t n)
{
if(n<26)
return 'a'+n;
else if(n<52)
return 'A'+(n-26);
else if(n==52)
return '_';
else if(n<63)
return '0'+(n-53);
else
return 0;
}
std::string minimal_name(uint32_t n)
{
if(n<53)
{
std::string ret;
ret += nchar(n);
return ret;
}
else
{
uint32_t k=n%53;
uint32_t q=n/53;
std::string ret;
ret += nchar(k);
ret += nchar(q);
while(q>64)
{
q /= 64;
ret += nchar(q);
}
return ret;
}
}
// vars: input variables
// excluded: excluded variables to make sure there is no collision
strmap_t gen_minimal_map(countmap_t const& vars, set_t const& excluded)
{
strmap_t ret;
auto ordered = sort_by_value(vars);
uint32_t n=0;
for(std::pair<std::string,uint32_t> it: ordered)
{
std::string newname;
do {
newname = minimal_name(n);
n++;
} while( excluded.find(newname) != excluded.end() );
ret.insert(std::make_pair(it.first, newname));
}
return ret;
}
// calls
void minify_var(_obj* in, std::regex const& exclude)
{
// countmap_t vars;
set_t excluded;
strmap_t varmap;
// get vars
varmap_get(in, exclude);
// concatenate excluded and reserved
concat_sets(excluded, m_excluded_var);
concat_sets(excluded, all_reserved_words);
// create mapping
varmap=gen_minimal_map(m_vars, excluded);
// perform replace
recurse(r_replace_var, in, &varmap);
require_rescan_var();
}
void minify_fct(_obj* in, std::regex const& exclude)
{
// countmap_t fcts, cmdmap;
set_t excluded, unsets;
strmap_t fctmap;
// get fcts and cmds
fctcmdmap_get(in, exclude, regex_null);
recurse(r_get_unsets, in, &unsets);
// concatenate cmds, excluded and reserved
excluded=map_to_set(m_cmds);
exclude_sets(excluded, map_to_set(m_fcts));
concat_sets(excluded, m_excluded_fct);
concat_sets(excluded, unsets);
concat_sets(excluded, all_reserved_words);
// create mapping
m_fcts = combine_common(m_fcts, m_cmds);
fctmap=gen_minimal_map(m_fcts, excluded);
// perform replace
recurse(r_replace_fct, in, &fctmap);
require_rescan_fct();
require_rescan_cmd();
}
bool delete_unused_fct(_obj* in, std::regex const& exclude)
{
set_t unused;
// get fcts and cmds
fctcmdmap_get(in, exclude, regex_null);
// find unused fcts
for(auto it: m_fcts)
{
if(m_cmds.find(it.first) == m_cmds.end())
unused.insert(it.first);
}
// perform deletion
if(unused.size()>0)
{
recurse(r_delete_fct, in, &unused);
require_rescan_all();
return true;
}
else
return false;
}
bool delete_unused_var(_obj* in, std::regex const& exclude)
{
set_t unused;
// get fcts and cmds
varmap_get(in, exclude);
// find unused vars
for(auto it: m_vardefs)
{
if(it.first!="" && m_varcalls.find(it.first) == m_varcalls.end())
unused.insert(it.first);
}
// perform deletion
if(unused.size()>0)
{
recurse(r_delete_var, in, &unused);
require_rescan_all();
return true;
}
else
return false;
}
bool delete_unused_both(_obj* in, std::regex const& var_exclude, std::regex const& fct_exclude)
{
set_t unused_var, unused_fct;
// get all
allmaps_get(in, var_exclude, fct_exclude, regex_null);
// find unused
for(auto it: m_vardefs)
{
if(it.first!="" && m_varcalls.find(it.first) == m_varcalls.end())
unused_var.insert(it.first);
}
for(auto it: m_fcts)
{
if(m_cmds.find(it.first) == m_cmds.end())
unused_fct.insert(it.first);
}
if(unused_var.size()>0 || unused_fct.size()>0)
{
recurse(r_delete_varfct, in, &unused_var, &unused_fct);
require_rescan_all();
return true;
}
return false;
}
void delete_unused(_obj* in, std::regex const& var_exclude, std::regex const& fct_exclude)
{
while(delete_unused_both(in, var_exclude, fct_exclude));
// keep deleting until both no deletion
}
// minify ${var} to $var
bool r_minify_empty_manip(_obj* in)
{
switch(in->type)
{
case _obj::arg: {
arg_t* t = dynamic_cast<arg_t*>(in);
for(uint32_t i=0; i<t->sa.size(); i++)
{
if(t->sa[i]->type == _obj::subarg_variable)
{
// has to be a variable
subarg_variable_t* ss = dynamic_cast<subarg_variable_t*>(t->sa[i]);
if(ss->var->is_manip)
{
// if is a manip: possibility to skip it
if(ss->var->index != nullptr) // is a var bash array: skip
return true;
if(i+1<t->sa.size() && t->sa[i+1]->type == _obj::subarg_string)
{
// if next subarg is a string: check its first char
subarg_string_t* ss = dynamic_cast<subarg_string_t*>(t->sa[i+1]);
char c = ss->val[0];
// if its first would extend the var name: skip
if(is_alphanum(c) || c == '_')
continue;
}
// if has no actual manipulation operation: set it to not manip
if(ss->var->manip == nullptr || ss->var->manip->sa.size() == 0)
ss->var->is_manip = false;
}
}
}
}; break;
default: break;
}
return true;
}
block_t* do_one_minify_single_block(block_t* in)
{
block_t* ret=nullptr;
list_t* l=nullptr;
if(in->type == _obj::block_brace)
l = dynamic_cast<brace_t*>(in)->lst;
else if(in->type == _obj::block_subshell)
l = dynamic_cast<subshell_t*>(in)->lst;
if(l == nullptr)
return nullptr;
// not a single cmd/block: not applicable
if(l->cls.size() != 1 || l->cls[0]->pls.size() != 1 || l->cls[0]->pls[0]->cmds.size() != 1)
return nullptr;
ret = l->cls[0]->pls[0]->cmds[0];
// if is a subshell and has some env set: don't remove it
if(in->type == _obj::block_subshell && has_env_set(ret))
return nullptr;
return ret;
}
bool r_minify_single_block(_obj* in)
{
switch(in->type)
{
case _obj::pipeline: {
bool has_operated=false;
do
{
// loop operating on current
// (if has operated, current object has changed)
has_operated=false;
pipeline_t* t = dynamic_cast<pipeline_t*>(in);
for(uint32_t i=0; i<t->cmds.size(); i++)
{
block_t* ret = do_one_minify_single_block(t->cmds[i]);
if(ret != nullptr) {
// concatenate redirects
for(uint32_t j=0; j<t->cmds[i]->redirs.size(); j++)
ret->redirs.insert(ret->redirs.begin()+j, t->cmds[i]->redirs[j]);
// deindex
t->cmds[i]->redirs.resize(0);
if(t->cmds[i]->type == _obj::block_brace)
dynamic_cast<brace_t*>(t->cmds[i])->lst->cls[0]->pls[0]->cmds[0] = nullptr;
else if(t->cmds[i]->type == _obj::block_subshell)
dynamic_cast<subshell_t*>(t->cmds[i])->lst->cls[0]->pls[0]->cmds[0] = nullptr;
// replace value
delete t->cmds[i];
t->cmds[i] = ret;
has_operated=true;
}
}
}
while(has_operated);
}; break;
default: break;
}
return true;
}
bool r_has_backtick(_obj* in, bool* r)
{
if(*r)
return false;
switch(in->type)
{
case _obj::subarg_subshell: {
subarg_subshell_t* t = dynamic_cast<subarg_subshell_t*>(in);
if(t->backtick) {
*r = true;
return false;
}
}; break;
default: break;
}
return true;
}
bool r_minify_backtick(_obj* in)
{
switch(in->type)
{
case _obj::subarg_subshell: {
subarg_subshell_t* t = dynamic_cast<subarg_subshell_t*>(in);
if(!t->backtick) {
bool has_backtick_child=false;
recurse(r_has_backtick, t->sbsh, &has_backtick_child);
if(has_backtick_child)
return false;
t->backtick = true;
}
return false;
}; break;
default: break;
}
return true;
}
// optimisation for processors that don't have recurse-cancellation
bool r_minify(_obj* in)
{
r_minify_empty_manip(in);
r_minify_single_block(in);
r_do_string_processor(in);
return true;
}
void minify_generic(_obj* in)
{
recurse(r_minify, in);
recurse(r_minify_backtick, in);
recurse(r_minify_useless_quotes, in);
}