implement redirect parsing and minimizing

This commit is contained in:
zawz 2020-11-19 16:51:26 +01:00
parent b0060b1fdf
commit 1a5bbd7986
6 changed files with 255 additions and 44 deletions

View file

@ -9,8 +9,8 @@
#define SPACES " \t" #define SPACES " \t"
#define SEPARATORS " \t\n" #define SEPARATORS " \t\n"
#define ARG_END " \t\n;#()&|" #define ARG_END " \t\n;#()&|<>"
#define VARNAME_END " \t\n;#()&|=\"'\\{}" #define VARNAME_END " \t\n;#()&|=\"'\\{}/-+"
#define BLOCK_TOKEN_END " \t\n;#()&|=\"'\\" #define BLOCK_TOKEN_END " \t\n;#()&|=\"'\\"
#define COMMAND_SEPARATOR "\n;" #define COMMAND_SEPARATOR "\n;"
#define CONTROL_END "#)" #define CONTROL_END "#)"
@ -23,6 +23,10 @@
std::string import_file(std::string const& path); std::string import_file(std::string const& path);
bool is_num(char c);
bool is_alpha(char c);
bool is_alphanum(char c);
shmain* parse_text(const char* in, uint32_t size, std::string const& filename=""); shmain* parse_text(const char* in, uint32_t size, std::string const& filename="");
inline shmain* parse_text(std::string const& in, std::string const& filename="") { return parse_text(in.c_str(), in.size(), filename); } inline shmain* parse_text(std::string const& in, std::string const& filename="") { return parse_text(in.c_str(), in.size(), filename); }
inline shmain* parse(std::string const& file) { return parse_text(import_file(file), file); } inline shmain* parse(std::string const& file) { return parse_text(import_file(file), file); }

View file

@ -19,6 +19,12 @@ void recurse(bool (&fct)(_obj*, Args...), _obj* o, Args... args)
// recursive calls // recursive calls
switch(o->type) switch(o->type)
{ {
case _obj::_redirect :
{
redirect* t = dynamic_cast<redirect*>(o);
recurse(fct, t->target, args...);
break;
}
case _obj::_arg : case _obj::_arg :
{ {
arg* t = dynamic_cast<arg*>(o); arg* t = dynamic_cast<arg*>(o);

View file

@ -76,11 +76,13 @@ extern std::string g_origin;
cmd* make_cmd(std::vector<std::string> args); cmd* make_cmd(std::vector<std::string> args);
// meta object type
class _obj class _obj
{ {
public: public:
enum _objtype { enum _objtype {
subarg_string, subarg_variable, subarg_subshell, subarg_arithmetic, subarg_manipulation, subarg_string, subarg_variable, subarg_subshell, subarg_arithmetic, subarg_manipulation,
_redirect,
_arg, _arg,
_arglist, _arglist,
_pipeline, _pipeline,
@ -101,7 +103,6 @@ public:
virtual std::string generate(int ind)=0; virtual std::string generate(int ind)=0;
}; };
class arg : public _obj class arg : public _obj
{ {
public: public:
@ -142,19 +143,31 @@ public:
std::string generate(int ind); std::string generate(int ind);
}; };
class redirect : public _obj
{
public:
redirect(arg* in=nullptr) { type=_obj::_redirect; target=in; }
~redirect() { if(target != nullptr) delete target; }
std::string generate(int ind);
std::string op;
arg* target;
};
// Meta block // Meta block
class block : public _obj class block : public _obj
{ {
public: public:
block() { redirs=nullptr; } block() { ; }
virtual ~block() { if(redirs!=nullptr) delete redirs; } virtual ~block() { for(auto it: redirs) delete it; }
// cmd // cmd
arglist* redirs; std::vector<redirect*> redirs;
// subshell: return the containing cmd, if it is a single command // subshell: return the containing cmd, if it is a single command
cmd* single_cmd(); cmd* single_cmd();
std::string generate_redirs(int ind); std::string generate_redirs(int ind, std::string const& _str);
virtual std::string generate(int ind)=0; virtual std::string generate(int ind)=0;
}; };

View file

@ -109,20 +109,33 @@ std::string list::generate(int ind, bool first_indent)
return ret; return ret;
} }
std::string redirect::generate(int ind)
{
std::string ret=op;
if(target!=nullptr)
{
if(!opt_minimize)
ret += ' ';
ret += target->generate(0);
}
return ret;
}
// BLOCK // BLOCK
std::string block::generate_redirs(int ind) std::string block::generate_redirs(int ind, std::string const& _str)
{ {
std::string ret; std::string ret=" ";
if(redirs != nullptr) bool previous_isnt_num = _str.size()>0 && !is_num(_str[_str.size()-1]);
for(auto it: redirs)
{ {
std::string t = redirs->generate(ind); std::string _r = it->generate(0);
if(t!="") if(opt_minimize && _r.size() > 0 && !is_num(_r[0]) && previous_isnt_num)
{ ret.pop_back(); // remove one space if possible
if(!opt_minimize) ret += ' '; ret += _r + ' ';
ret += t; previous_isnt_num = ret.size()>1 && !is_num(ret[ret.size()-2]);
}
} }
ret.pop_back(); // remove last space
return ret; return ret;
} }
@ -155,6 +168,8 @@ std::string if_block::generate(int ind)
} }
ret += indented("fi", ind); ret += indented("fi", ind);
ret += generate_redirs(ind, ret);
return ret; return ret;
} }
@ -170,6 +185,9 @@ std::string for_block::generate(int ind)
ret += ops->generate(ind+1); ret += ops->generate(ind+1);
ret += indented("done", ind); ret += indented("done", ind);
if(opt_minimize && ret.size()>1 && !is_alpha(ret[ret.size()-2]))
ret.pop_back();
ret += generate_redirs(ind, ret);
return ret; return ret;
} }
@ -187,6 +205,9 @@ std::string while_block::generate(int ind)
ret += ops->generate(ind+1); ret += ops->generate(ind+1);
ret += indented("done", ind); ret += indented("done", ind);
if(opt_minimize && ret.size()>1 && !is_alpha(ret[ret.size()-2]))
ret.pop_back();
ret += generate_redirs(ind, ret);
return ret; return ret;
} }
@ -203,8 +224,7 @@ std::string subshell::generate(int ind)
// close subshell // close subshell
ret += indented(")", ind); ret += indented(")", ind);
ret += generate_redirs(ind); ret += generate_redirs(ind, ret);
return ret; return ret;
} }
@ -220,6 +240,8 @@ std::string shmain::generate(bool print_shebang, int ind)
ret += lst->generate(ind); ret += lst->generate(ind);
if( opt_minimize && ret[ret.size()-1] == '\n') if( opt_minimize && ret[ret.size()-1] == '\n')
ret.pop_back(); ret.pop_back();
ret += generate_redirs(ind, ret);
return ret; return ret;
} }
@ -231,8 +253,7 @@ std::string brace::generate(int ind)
ret += lst->generate(ind+1); ret += lst->generate(ind+1);
ret += indented("}", ind); ret += indented("}", ind);
ret += generate_redirs(ind); ret += generate_redirs(ind, ret);
return ret; return ret;
} }
@ -247,8 +268,7 @@ std::string function::generate(int ind)
ret += lst->generate(ind+1); ret += lst->generate(ind+1);
ret += indented("}", ind); ret += indented("}", ind);
ret += generate_redirs(ind); ret += generate_redirs(ind, ret);
return ret; return ret;
} }
@ -285,8 +305,7 @@ std::string case_block::generate(int ind)
ind--; ind--;
ret += indented("esac", ind); ret += indented("esac", ind);
ret += generate_redirs(ind); ret += generate_redirs(ind, ret);
return ret; return ret;
} }
@ -309,6 +328,7 @@ std::string cmd::generate(int ind)
if(ret[ret.size()-1] == ' ') if(ret[ret.size()-1] == ' ')
ret.pop_back(); ret.pop_back();
ret += generate_redirs(ind, ret);
return ret; return ret;
} }

View file

@ -4,15 +4,23 @@
#include <strings.h> #include <strings.h>
#include <string.h> #include <string.h>
#include <ztd/shell.hpp>
#include "util.hpp" #include "util.hpp"
#define ORIGIN_NONE "" #define ORIGIN_NONE ""
// macro
#define PARSE_ERROR(str, i) ztd::format_error(str, "", in, i) #define PARSE_ERROR(str, i) ztd::format_error(str, "", in, i)
// constants
const std::vector<std::string> all_reserved_words = { "if", "then", "else", "fi", "case", "esac", "for", "while", "do", "done", "{", "}" }; const std::vector<std::string> all_reserved_words = { "if", "then", "else", "fi", "case", "esac", "for", "while", "do", "done", "{", "}" };
const std::vector<std::string> out_reserved_words = { "then", "else", "fi", "esac", "do", "done", "}" }; const std::vector<std::string> out_reserved_words = { "then", "else", "fi", "esac", "do", "done", "}" };
// stuff
std::string g_expecting; std::string g_expecting;
std::string expecting(std::string const& word) std::string expecting(std::string const& word)
@ -36,13 +44,17 @@ bool has_common_char(const char* str1, const char* str2)
return false; return false;
} }
inline bool is_alphanum(char c) inline bool is_num(char c)
{ {
return (c >= 'a' && c<='z') || (c >= 'A' && c<='Z') || (c >= '0' && c<='9'); return (c >= '0' && c <= '9');
} }
inline bool is_alpha(char c) inline bool is_alpha(char c)
{ {
return (c >= 'a' && c<='z') || (c >= 'A' && c<='Z'); return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
}
inline bool is_alphanum(char c)
{
return is_alpha(c) || is_num(c);
} }
bool valid_name(std::string const& str) bool valid_name(std::string const& str)
@ -217,7 +229,7 @@ std::pair<arg*, uint32_t> parse_arg(const char* in, uint32_t size, uint32_t star
if(unexpected != NULL && is_in(in[i], unexpected)) if(unexpected != NULL && is_in(in[i], unexpected))
throw PARSE_ERROR( strf("Unexpected token '%c'", in[i]) , i); throw PARSE_ERROR( strf("Unexpected token '%c'", in[i]) , i);
while(i<size && !is_in(in[i], end)) while(i<size && !(end != NULL && is_in(in[i], end)) )
{ {
if(i+1<size && is_in(in[i], "<>") && in[i+1]=='&') // special case for <& and >& if(i+1<size && is_in(in[i], "<>") && in[i+1]=='&') // special case for <& and >&
{ {
@ -375,14 +387,135 @@ std::pair<arg*, uint32_t> parse_arg(const char* in, uint32_t size, uint32_t star
return std::make_pair(ret, i); return std::make_pair(ret, i);
} }
std::pair<redirect*, uint32_t> parse_redirect(const char* in, uint32_t size, uint32_t start)
{
uint32_t i=start;
bool is_redirect=false;
bool needs_arg=false;
bool has_num_prefix=false;
if(in[i] > '0' && in[i] < '9')
{
i++;
has_num_prefix=true;
}
if( in[i] == '>' )
{
i++;
if(i>size)
PARSE_ERROR("Unexpected end of file", i);
is_redirect = true;
if(i+1<size && in[i] == '&' && is_num(in[i+1]) )
{
i+=2;
needs_arg=false;
}
else
{
if(in[i] == '>')
i++;
needs_arg=true;
}
}
else if( in[i] == '<' )
{
if(has_num_prefix)
PARSE_ERROR("Invalid input redirection", i-1);
i++;
if(i>size)
PARSE_ERROR("Unexpected end of file", i);
if(in[i] == '<')
i++;
is_redirect=true;
needs_arg=true;
}
if(is_redirect)
{
redirect* ret=nullptr;
try
{
ret = new redirect;
ret->op = std::string(in+start, i-start);
if(needs_arg)
{
i = skip_chars(in, size, i, SPACES);
if(ret->op == "<<")
{
auto pa = parse_arg(in, size, i);
std::string delimitor = pa.first->string();
delete pa.first;
pa.first = nullptr;
if(delimitor == "")
PARSE_ERROR("Non-static or empty text input delimitor", i);
if(delimitor.find('"') != std::string::npos || delimitor.find('\'') != std::string::npos || delimitor.find('\\') != std::string::npos)
{
delimitor = ztd::sh("echo "+delimitor); // shell resolve the delimitor
delimitor.pop_back(); // remove \n
}
i = skip_chars(in, size, pa.second, SPACES); // skip spaces
if(in[i] == '#') // skip comment
i = skip_until(in, size, i, "\n"); //skip to endline
if(in[i] != '\n') // has another arg
throw PARSE_ERROR("Additionnal argument after text input delimitor", i);
i++;
uint32_t j=i;
char* tc=NULL;
tc = (char*) strstr(in+i, std::string("\n"+delimitor+"\n").c_str()); // find delimitor
if(tc!=NULL) // delimitor was found
{
i = (tc-in)+delimitor.size()+1;
}
else
{
i = size;
// maybe at end of file with no \n
// if(strstr(in+size-delimitor.size(), std::string("\n"+delimitor).c_str())!=NULL)
// i = size-delimitor.size();
// else // not found: end of file
}
std::string tmpparse=std::string(in+j, i-j);
auto pval = parse_arg(tmpparse.c_str(), tmpparse.size(), 0, NULL);
ret->target = pval.first;
ret->target->sa.insert(ret->target->sa.begin(), new string_subarg(delimitor+"\n"));
}
else
{
auto pa = parse_arg(in, size, i);
ret->target = pa.first;
i=pa.second;
}
}
}
catch(ztd::format_error& e)
{
if(ret!=nullptr)
delete ret;
throw e;
}
return std::make_pair(ret, i);
}
else
return std::make_pair(nullptr, start);
}
// parse one list of arguments (a command for instance) // parse one list of arguments (a command for instance)
// must start at a read char // must start at a read char
// first char has to be read // first char has to be read
// ends at either &|;\n#() // ends at either &|;\n#()
std::pair<arglist*, uint32_t> parse_arglist(const char* in, uint32_t size, uint32_t start, bool hard_error=false) std::pair<arglist*, uint32_t> parse_arglist(const char* in, uint32_t size, uint32_t start, bool hard_error=false, std::vector<redirect*>* redirs=nullptr)
{ {
uint32_t i=start; uint32_t i=start;
arglist* ret = new arglist; arglist* ret = nullptr;
try try
{ {
@ -395,9 +528,27 @@ std::pair<arglist*, uint32_t> parse_arglist(const char* in, uint32_t size, uint3
} }
while(i<size) while(i<size)
{ {
auto pp=parse_arg(in, size, i); if(redirs!=nullptr)
ret->args.push_back(pp.first); {
i = skip_chars(in, size, pp.second, SPACES); auto pr = parse_redirect(in, size, i);
if(pr.first != nullptr)
{
redirs->push_back(pr.first);
i=pr.second;
}
else
goto argparse;
}
else
{
argparse:
if(ret == nullptr)
ret = new arglist;
auto pp=parse_arg(in, size, i);
ret->args.push_back(pp.first);
i = pp.second;
}
i = skip_chars(in, size, i, SPACES);
if(i>=size) if(i>=size)
return std::make_pair(ret, i); return std::make_pair(ret, i);
if( is_in(in[i], SPECIAL_TOKENS) ) if( is_in(in[i], SPECIAL_TOKENS) )
@ -406,7 +557,8 @@ std::pair<arglist*, uint32_t> parse_arglist(const char* in, uint32_t size, uint3
} }
catch(ztd::format_error& e) catch(ztd::format_error& e)
{ {
delete ret; if(ret != nullptr)
delete ret;
throw e; throw e;
} }
return std::make_pair(ret, i); return std::make_pair(ret, i);
@ -730,7 +882,7 @@ std::pair<function*, uint32_t> parse_function(const char* in, uint32_t size, uin
auto pp=parse_list_until(in, size, i, '}'); auto pp=parse_list_until(in, size, i, '}');
if(pp.first->size()<=0) if(pp.first->size()<=0)
throw PARSE_ERROR("Condition is empty", i); throw PARSE_ERROR("Function is empty", i);
ret->lst=pp.first; ret->lst=pp.first;
i=pp.second; i=pp.second;
@ -769,7 +921,7 @@ std::pair<cmd*, uint32_t> parse_cmd(const char* in, uint32_t size, uint32_t star
if(!is_in(in[i], SPECIAL_TOKENS)) if(!is_in(in[i], SPECIAL_TOKENS))
{ {
auto pp=parse_arglist(in, size, i, true); auto pp=parse_arglist(in, size, i, true, &ret->redirs);
ret->args = pp.first; ret->args = pp.first;
i = pp.second; i = pp.second;
} }
@ -1104,16 +1256,31 @@ std::pair<block*, uint32_t> parse_block(const char* in, uint32_t size, uint32_t
if(ret->type != block::block_cmd) if(ret->type != block::block_cmd)
{ {
auto pp=parse_arglist(in, size, i, false); // in case of redirects // while(true)
if(pp.first->args.size()>0) // {
{ // auto pr=parse_redirect(in, size, i);
i = pp.second; // if(pr.first == nullptr)
ret->redirs=pp.first; // break;
} // ret->redirs.push_back(pr.first);
else // i = pr.second;
// }
uint32_t j=skip_chars(in, size, i, SPACES);
auto pp=parse_arglist(in, size, j, false, &ret->redirs); // in case of redirects
if(pp.first != nullptr)
{ {
delete pp.first; delete pp.first;
throw PARSE_ERROR("Extra argument after block", i);
} }
i=pp.second;
// if(pp.first->args.size()>0)
// {
// i = pp.second;
// ret->redirs=pp.first;
// }
// else
// {
// delete pp.first;
// }
} }
} }
catch(ztd::format_error& e) catch(ztd::format_error& e)

View file

@ -380,7 +380,8 @@ bool resolve_recurse(_obj* o, shmain* parent)
resolve_arg(it.second, parent, true); // force quoted resolve_arg(it.second, parent, true); // force quoted
resolve(it.second, parent); resolve(it.second, parent);
} }
resolve(t->redirs, parent); for(auto it: t->redirs)
resolve(it, parent);
resolve(t->args, parent); resolve(t->args, parent);
return false; return false;
}; break; }; break;