From 1a5bbd79861f32c5f84da3aa6f23db7f5246cb30 Mon Sep 17 00:00:00 2001 From: zawz Date: Thu, 19 Nov 2020 16:51:26 +0100 Subject: [PATCH] implement redirect parsing and minimizing --- include/parse.hpp | 8 +- include/recursive.hpp | 6 ++ include/struc.hpp | 23 +++-- src/generate.cpp | 54 +++++++---- src/parse.cpp | 205 ++++++++++++++++++++++++++++++++++++++---- src/resolve.cpp | 3 +- 6 files changed, 255 insertions(+), 44 deletions(-) diff --git a/include/parse.hpp b/include/parse.hpp index 1ceee16..6577c2e 100644 --- a/include/parse.hpp +++ b/include/parse.hpp @@ -9,8 +9,8 @@ #define SPACES " \t" #define SEPARATORS " \t\n" -#define ARG_END " \t\n;#()&|" -#define VARNAME_END " \t\n;#()&|=\"'\\{}" +#define ARG_END " \t\n;#()&|<>" +#define VARNAME_END " \t\n;#()&|=\"'\\{}/-+" #define BLOCK_TOKEN_END " \t\n;#()&|=\"'\\" #define COMMAND_SEPARATOR "\n;" #define CONTROL_END "#)" @@ -23,6 +23,10 @@ std::string import_file(std::string const& path); +bool is_num(char c); +bool is_alpha(char c); +bool is_alphanum(char c); + shmain* parse_text(const char* in, uint32_t size, std::string const& filename=""); inline shmain* parse_text(std::string const& in, std::string const& filename="") { return parse_text(in.c_str(), in.size(), filename); } inline shmain* parse(std::string const& file) { return parse_text(import_file(file), file); } diff --git a/include/recursive.hpp b/include/recursive.hpp index 1c04f43..69294d9 100644 --- a/include/recursive.hpp +++ b/include/recursive.hpp @@ -19,6 +19,12 @@ void recurse(bool (&fct)(_obj*, Args...), _obj* o, Args... args) // recursive calls switch(o->type) { + case _obj::_redirect : + { + redirect* t = dynamic_cast(o); + recurse(fct, t->target, args...); + break; + } case _obj::_arg : { arg* t = dynamic_cast(o); diff --git a/include/struc.hpp b/include/struc.hpp index 0d9ed68..0acda2c 100644 --- a/include/struc.hpp +++ b/include/struc.hpp @@ -76,11 +76,13 @@ extern std::string g_origin; cmd* make_cmd(std::vector args); +// meta object type class _obj { public: enum _objtype { subarg_string, subarg_variable, subarg_subshell, subarg_arithmetic, subarg_manipulation, + _redirect, _arg, _arglist, _pipeline, @@ -101,7 +103,6 @@ public: virtual std::string generate(int ind)=0; }; - class arg : public _obj { public: @@ -142,19 +143,31 @@ public: std::string generate(int ind); }; +class redirect : public _obj +{ +public: + redirect(arg* in=nullptr) { type=_obj::_redirect; target=in; } + ~redirect() { if(target != nullptr) delete target; } + + std::string generate(int ind); + + std::string op; + arg* target; +}; + // Meta block class block : public _obj { public: - block() { redirs=nullptr; } - virtual ~block() { if(redirs!=nullptr) delete redirs; } + block() { ; } + virtual ~block() { for(auto it: redirs) delete it; } // cmd - arglist* redirs; + std::vector redirs; // subshell: return the containing cmd, if it is a single command cmd* single_cmd(); - std::string generate_redirs(int ind); + std::string generate_redirs(int ind, std::string const& _str); virtual std::string generate(int ind)=0; }; diff --git a/src/generate.cpp b/src/generate.cpp index 8e569a4..ee135ee 100644 --- a/src/generate.cpp +++ b/src/generate.cpp @@ -109,20 +109,33 @@ std::string list::generate(int ind, bool first_indent) return ret; } +std::string redirect::generate(int ind) +{ + std::string ret=op; + if(target!=nullptr) + { + if(!opt_minimize) + ret += ' '; + ret += target->generate(0); + } + return ret; +} + // BLOCK -std::string block::generate_redirs(int ind) +std::string block::generate_redirs(int ind, std::string const& _str) { - std::string ret; - if(redirs != nullptr) + std::string ret=" "; + bool previous_isnt_num = _str.size()>0 && !is_num(_str[_str.size()-1]); + for(auto it: redirs) { - std::string t = redirs->generate(ind); - if(t!="") - { - if(!opt_minimize) ret += ' '; - ret += t; - } + std::string _r = it->generate(0); + if(opt_minimize && _r.size() > 0 && !is_num(_r[0]) && previous_isnt_num) + ret.pop_back(); // remove one space if possible + ret += _r + ' '; + previous_isnt_num = ret.size()>1 && !is_num(ret[ret.size()-2]); } + ret.pop_back(); // remove last space return ret; } @@ -155,6 +168,8 @@ std::string if_block::generate(int ind) } ret += indented("fi", ind); + + ret += generate_redirs(ind, ret); return ret; } @@ -170,6 +185,9 @@ std::string for_block::generate(int ind) ret += ops->generate(ind+1); ret += indented("done", ind); + if(opt_minimize && ret.size()>1 && !is_alpha(ret[ret.size()-2])) + ret.pop_back(); + ret += generate_redirs(ind, ret); return ret; } @@ -187,6 +205,9 @@ std::string while_block::generate(int ind) ret += ops->generate(ind+1); ret += indented("done", ind); + if(opt_minimize && ret.size()>1 && !is_alpha(ret[ret.size()-2])) + ret.pop_back(); + ret += generate_redirs(ind, ret); return ret; } @@ -203,8 +224,7 @@ std::string subshell::generate(int ind) // close subshell ret += indented(")", ind); - ret += generate_redirs(ind); - + ret += generate_redirs(ind, ret); return ret; } @@ -220,6 +240,8 @@ std::string shmain::generate(bool print_shebang, int ind) ret += lst->generate(ind); if( opt_minimize && ret[ret.size()-1] == '\n') ret.pop_back(); + + ret += generate_redirs(ind, ret); return ret; } @@ -231,8 +253,7 @@ std::string brace::generate(int ind) ret += lst->generate(ind+1); ret += indented("}", ind); - ret += generate_redirs(ind); - + ret += generate_redirs(ind, ret); return ret; } @@ -247,8 +268,7 @@ std::string function::generate(int ind) ret += lst->generate(ind+1); ret += indented("}", ind); - ret += generate_redirs(ind); - + ret += generate_redirs(ind, ret); return ret; } @@ -285,8 +305,7 @@ std::string case_block::generate(int ind) ind--; ret += indented("esac", ind); - ret += generate_redirs(ind); - + ret += generate_redirs(ind, ret); return ret; } @@ -309,6 +328,7 @@ std::string cmd::generate(int ind) if(ret[ret.size()-1] == ' ') ret.pop_back(); + ret += generate_redirs(ind, ret); return ret; } diff --git a/src/parse.cpp b/src/parse.cpp index 1f30894..f003dc0 100644 --- a/src/parse.cpp +++ b/src/parse.cpp @@ -4,15 +4,23 @@ #include #include +#include + #include "util.hpp" #define ORIGIN_NONE "" +// macro + #define PARSE_ERROR(str, i) ztd::format_error(str, "", in, i) +// constants + const std::vector all_reserved_words = { "if", "then", "else", "fi", "case", "esac", "for", "while", "do", "done", "{", "}" }; const std::vector out_reserved_words = { "then", "else", "fi", "esac", "do", "done", "}" }; +// stuff + std::string g_expecting; std::string expecting(std::string const& word) @@ -36,13 +44,17 @@ bool has_common_char(const char* str1, const char* str2) return false; } -inline bool is_alphanum(char c) +inline bool is_num(char c) { - return (c >= 'a' && c<='z') || (c >= 'A' && c<='Z') || (c >= '0' && c<='9'); + return (c >= '0' && c <= '9'); } inline bool is_alpha(char c) { - return (c >= 'a' && c<='z') || (c >= 'A' && c<='Z'); + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); +} +inline bool is_alphanum(char c) +{ + return is_alpha(c) || is_num(c); } bool valid_name(std::string const& str) @@ -217,7 +229,7 @@ std::pair parse_arg(const char* in, uint32_t size, uint32_t star if(unexpected != NULL && is_in(in[i], unexpected)) throw PARSE_ERROR( strf("Unexpected token '%c'", in[i]) , i); - while(i") && in[i+1]=='&') // special case for <& and >& { @@ -375,14 +387,135 @@ std::pair parse_arg(const char* in, uint32_t size, uint32_t star return std::make_pair(ret, i); } +std::pair parse_redirect(const char* in, uint32_t size, uint32_t start) +{ + uint32_t i=start; + + bool is_redirect=false; + bool needs_arg=false; + bool has_num_prefix=false; + + if(in[i] > '0' && in[i] < '9') + { + i++; + has_num_prefix=true; + } + + if( in[i] == '>' ) + { + i++; + if(i>size) + PARSE_ERROR("Unexpected end of file", i); + is_redirect = true; + if(i+1') + i++; + needs_arg=true; + } + + } + else if( in[i] == '<' ) + { + if(has_num_prefix) + PARSE_ERROR("Invalid input redirection", i-1); + i++; + if(i>size) + PARSE_ERROR("Unexpected end of file", i); + if(in[i] == '<') + i++; + is_redirect=true; + needs_arg=true; + } + + + if(is_redirect) + { + redirect* ret=nullptr; + try + { + ret = new redirect; + ret->op = std::string(in+start, i-start); + if(needs_arg) + { + i = skip_chars(in, size, i, SPACES); + if(ret->op == "<<") + { + auto pa = parse_arg(in, size, i); + std::string delimitor = pa.first->string(); + delete pa.first; + pa.first = nullptr; + + if(delimitor == "") + PARSE_ERROR("Non-static or empty text input delimitor", i); + + if(delimitor.find('"') != std::string::npos || delimitor.find('\'') != std::string::npos || delimitor.find('\\') != std::string::npos) + { + delimitor = ztd::sh("echo "+delimitor); // shell resolve the delimitor + delimitor.pop_back(); // remove \n + } + + i = skip_chars(in, size, pa.second, SPACES); // skip spaces + + if(in[i] == '#') // skip comment + i = skip_until(in, size, i, "\n"); //skip to endline + if(in[i] != '\n') // has another arg + throw PARSE_ERROR("Additionnal argument after text input delimitor", i); + + i++; + uint32_t j=i; + char* tc=NULL; + tc = (char*) strstr(in+i, std::string("\n"+delimitor+"\n").c_str()); // find delimitor + if(tc!=NULL) // delimitor was found + { + i = (tc-in)+delimitor.size()+1; + } + else + { + i = size; + // maybe at end of file with no \n + // if(strstr(in+size-delimitor.size(), std::string("\n"+delimitor).c_str())!=NULL) + // i = size-delimitor.size(); + // else // not found: end of file + } + std::string tmpparse=std::string(in+j, i-j); + auto pval = parse_arg(tmpparse.c_str(), tmpparse.size(), 0, NULL); + ret->target = pval.first; + ret->target->sa.insert(ret->target->sa.begin(), new string_subarg(delimitor+"\n")); + } + else + { + auto pa = parse_arg(in, size, i); + ret->target = pa.first; + i=pa.second; + } + } + } + catch(ztd::format_error& e) + { + if(ret!=nullptr) + delete ret; + throw e; + } + return std::make_pair(ret, i); + } + else + return std::make_pair(nullptr, start); +} + // parse one list of arguments (a command for instance) // must start at a read char // first char has to be read // ends at either &|;\n#() -std::pair parse_arglist(const char* in, uint32_t size, uint32_t start, bool hard_error=false) +std::pair parse_arglist(const char* in, uint32_t size, uint32_t start, bool hard_error=false, std::vector* redirs=nullptr) { uint32_t i=start; - arglist* ret = new arglist; + arglist* ret = nullptr; try { @@ -395,9 +528,27 @@ std::pair parse_arglist(const char* in, uint32_t size, uint3 } while(iargs.push_back(pp.first); - i = skip_chars(in, size, pp.second, SPACES); + if(redirs!=nullptr) + { + auto pr = parse_redirect(in, size, i); + if(pr.first != nullptr) + { + redirs->push_back(pr.first); + i=pr.second; + } + else + goto argparse; + } + else + { +argparse: + if(ret == nullptr) + ret = new arglist; + auto pp=parse_arg(in, size, i); + ret->args.push_back(pp.first); + i = pp.second; + } + i = skip_chars(in, size, i, SPACES); if(i>=size) return std::make_pair(ret, i); if( is_in(in[i], SPECIAL_TOKENS) ) @@ -406,7 +557,8 @@ std::pair parse_arglist(const char* in, uint32_t size, uint3 } catch(ztd::format_error& e) { - delete ret; + if(ret != nullptr) + delete ret; throw e; } return std::make_pair(ret, i); @@ -730,7 +882,7 @@ std::pair parse_function(const char* in, uint32_t size, uin auto pp=parse_list_until(in, size, i, '}'); if(pp.first->size()<=0) - throw PARSE_ERROR("Condition is empty", i); + throw PARSE_ERROR("Function is empty", i); ret->lst=pp.first; i=pp.second; @@ -769,7 +921,7 @@ std::pair parse_cmd(const char* in, uint32_t size, uint32_t star if(!is_in(in[i], SPECIAL_TOKENS)) { - auto pp=parse_arglist(in, size, i, true); + auto pp=parse_arglist(in, size, i, true, &ret->redirs); ret->args = pp.first; i = pp.second; } @@ -1104,16 +1256,31 @@ std::pair parse_block(const char* in, uint32_t size, uint32_t if(ret->type != block::block_cmd) { - auto pp=parse_arglist(in, size, i, false); // in case of redirects - if(pp.first->args.size()>0) - { - i = pp.second; - ret->redirs=pp.first; - } - else + // while(true) + // { + // auto pr=parse_redirect(in, size, i); + // if(pr.first == nullptr) + // break; + // ret->redirs.push_back(pr.first); + // i = pr.second; + // } + uint32_t j=skip_chars(in, size, i, SPACES); + auto pp=parse_arglist(in, size, j, false, &ret->redirs); // in case of redirects + if(pp.first != nullptr) { delete pp.first; + throw PARSE_ERROR("Extra argument after block", i); } + i=pp.second; + // if(pp.first->args.size()>0) + // { + // i = pp.second; + // ret->redirs=pp.first; + // } + // else + // { + // delete pp.first; + // } } } catch(ztd::format_error& e) diff --git a/src/resolve.cpp b/src/resolve.cpp index 3ee9460..1473158 100644 --- a/src/resolve.cpp +++ b/src/resolve.cpp @@ -380,7 +380,8 @@ bool resolve_recurse(_obj* o, shmain* parent) resolve_arg(it.second, parent, true); // force quoted resolve(it.second, parent); } - resolve(t->redirs, parent); + for(auto it: t->redirs) + resolve(it, parent); resolve(t->args, parent); return false; }; break;