#include "parse.hpp" #include #include #include #include "util.hpp" #define ORIGIN_NONE "" #define PARSE_ERROR(str, i) ztd::format_error(str, "", in, i) const std::vector all_reserved_words = { "if", "then", "else", "fi", "case", "esac", "for", "while", "do", "done", "{", "}" }; const std::vector out_reserved_words = { "then", "else", "fi", "esac", "do", "done", "}" }; std::string g_expecting; std::string expecting(std::string const& word) { if(word != "") return ", expecting '"+word+"'"; else return ""; } // basic char utils bool has_common_char(const char* str1, const char* str2) { uint32_t i=0; while(str1[i]!=0) { if(is_in(str1[i], str2)) return true; } return false; } inline bool is_alphanum(char c) { return (c >= 'a' && c<='z') || (c >= 'A' && c<='Z') || (c >= '0' && c<='9'); } inline bool is_alpha(char c) { return (c >= 'a' && c<='z') || (c >= 'A' && c<='Z'); } bool valid_name(std::string const& str) { if(!is_alpha(str[0]) && str[0] != '_') return false; for(auto it: str) { if(! (is_alphanum(it) || it=='_' ) ) return false; } return true; } // string utils bool word_is_reserved_out(std::string const in) { for(auto it: out_reserved_words) if(in == it) return true; return false; } bool word_eq(const char* word, const char* in, uint32_t size, uint32_t start, const char* end_set=NULL) { uint32_t wordsize=strlen(word); if(wordsize > size-start) return false; if(strncmp(word, in+start, wordsize) == 0) { if(end_set==NULL) return true; // end set if(wordsize < size-start) return is_in(in[start+wordsize], end_set); } return false; } std::pair get_word(const char* in, uint32_t size, uint32_t start, const char* end_set) { uint32_t i=start; while(i parse_subshell(const char* in, uint32_t size, uint32_t start); std::pair parse_varname(const char* in, uint32_t size, uint32_t start) { uint32_t i=start; std::string ret; // special vars if(is_in(in[i], SPECIAL_VARS) || (in[i]>='0' && in[i]<='1')) { ret=in[i]; i++; } else // varname { while(i parse_arithmetic(const char* in, uint32_t size, uint32_t start) { arithmetic_subarg* ret = new arithmetic_subarg; uint32_t i=start; try { auto pp=parse_subshell(in, size, i); i=pp.second; delete pp.first; if(i >= size || in[i]!=')') { throw PARSE_ERROR( "Unexpected token ')', expecting '))'", i ); } ret->val = std::string(in+start, i-start-1); i++; } catch(ztd::format_error& e) { delete ret; throw e; } return std::make_pair(ret, i); } // parse one argument // must start at a read char // ends at either " \t|&;\n()" std::pair parse_arg(const char* in, uint32_t size, uint32_t start) { arg* ret = new arg; // j : start of subarg , q = start of quote uint32_t i=start,j=start,q=start; try { if(is_in(in[i], SPECIAL_TOKENS)) throw PARSE_ERROR( strf("Unexpected token '%c'", in[i]) , i); while(i") && in[i+1]=='&') // special case for <& and >& { i+=2; } else if(in[i]=='\\') // backslash: don't check next char { i++; if(i>=size) break; i++; } else if(in[i] == '"') // start double quote { q=i; i++; while(in[i] != '"') // while inside quoted string { if(in[i] == '\\') // backslash: don't check next char { i+=2; } else if( word_eq("$((", in, size, i) ) // arithmetic operation { // add previous subarg ret->sa.push_back(new string_subarg(std::string(in+j, i-j))); i+=3; // get arithmetic auto r=parse_arithmetic(in, size, i); ret->sa.push_back(r.first); j = i = r.second; } else if( word_eq("$(", in, size, i) ) // substitution { // add previous subarg ret->sa.push_back(new string_subarg(std::string(in+j, i-j))); i+=2; // get subshell auto r=parse_subshell(in, size, i); ret->sa.push_back(new subshell_subarg(r.first, true)); j = i = r.second; } else if( in[i] == '$' ) { auto r=parse_varname(in, size, i+1); if(r.second > i+1) { // add previous subarg ret->sa.push_back(new string_subarg(std::string(in+j, i-j))); // add varname ret->sa.push_back(new variable_subarg(r.first)); j = i = r.second; } else i++; } else i++; if(i>=size) throw PARSE_ERROR("Unterminated double quote", q); } i++; } else if(in[i] == '\'') // start single quote { q=i; i++; while(i=size) throw PARSE_ERROR("Unterminated single quote", q); i++; } else if( word_eq("$((", in, size, i) ) // arithmetic operation { // add previous subarg ret->sa.push_back(new string_subarg(std::string(in+j, i-j))); i+=3; // get arithmetic auto r=parse_arithmetic(in, size, i); ret->sa.push_back(r.first); j = i = r.second; } else if( word_eq("$(", in, size, i) ) // substitution { // add previous subarg ret->sa.push_back(new string_subarg(std::string(in+j, i-j))); i+=2; // get subshell auto r=parse_subshell(in, size, i); ret->sa.push_back(new subshell_subarg(r.first, false)); j = i = r.second; } else if( in[i] == '$' ) { auto r=parse_varname(in, size, i+1); if(r.second > i+1) { // add previous subarg ret->sa.push_back(new string_subarg(std::string(in+j, i-j))); // add varname ret->sa.push_back(new variable_subarg(r.first)); j = i = r.second; } else i++; } else i++; } // add string subarg std::string val=std::string(in+j, i-j); ret->sa.push_back(new string_subarg(val)); } catch(ztd::format_error& e) { delete ret; throw e; } return std::make_pair(ret, i); } // parse one list of arguments (a command for instance) // must start at a read char // first char has to be read // ends at either &|;\n#() std::pair parse_arglist(const char* in, uint32_t size, uint32_t start, bool hard_error=false) { uint32_t i=start; arglist* ret = new arglist; try { if(is_in(in[i], SPECIAL_TOKENS)) { if(hard_error) throw PARSE_ERROR( strf("Unexpected token '%c'", in[i]) , i); else return std::make_pair(ret, i); } while(iargs.push_back(pp.first); i = skip_chars(in, size, pp.second, SPACES); if(i>=size) return std::make_pair(ret, i); if( is_in(in[i], SPECIAL_TOKENS) ) return std::make_pair(ret, i); } } catch(ztd::format_error& e) { delete ret; throw e; } return std::make_pair(ret, i); } std::pair parse_block(const char* in, uint32_t size, uint32_t start); // parse a pipeline // must start at a read char // separated by | // ends at either &;\n#) std::pair parse_pipeline(const char* in, uint32_t size, uint32_t start) { uint32_t i=start; pipeline* ret = new pipeline; try { if(in[i] == '!' && i+1negated = true; i=skip_chars(in, size, i+1, SPACES); } while(iadd(pp.first); i = skip_chars(in, size, pp.second, SPACES); if( i>=size || is_in(in[i], PIPELINE_END) || word_eq("||", in, size, i) ) return std::make_pair(ret, i); else if( in[i] != '|' ) throw PARSE_ERROR( strf("Unexpected token: '%c'", in[i] ), i); i++; } } catch(ztd::format_error& e) { delete ret; throw e; } return std::make_pair(ret, i); } // parse condition lists // must start at a read char // separated by && or || // ends at either ;\n)# std::pair parse_condlist(const char* in, uint32_t size, uint32_t start) { uint32_t i = skip_unread(in, size, start); condlist* ret = new condlist; try { bool optype=AND_OP; while(iadd(pp.first, optype); i = pp.second; if(i>=size || is_in(in[i], CONTROL_END) || is_in(in[i], COMMAND_SEPARATOR)) // end here exactly: used for control later { return std::make_pair(ret, i); } else if( word_eq("&", in, size, i) && !word_eq("&&", in, size, i) ) // parallel: end one char after { ret->parallel=true; i++; return std::make_pair(ret, i); } else if( word_eq("&&", in, size, i) ) // and op { i += 2; optype=AND_OP; } else if( word_eq("||", in, size, i) ) // or op { i += 2; optype=OR_OP; } else throw PARSE_ERROR( strf("Unexpected token: '%c'", in[i]), i); i = skip_unread(in, size, i); if(i>=size) throw PARSE_ERROR( "Unexpected end of file", i ); } } catch(ztd::format_error& e) { delete ret; throw e; } return std::make_pair(ret, i); } std::pair parse_list_until(const char* in, uint32_t size, uint32_t start, char end_c, const char* expecting=NULL) { list* ret = new list; uint32_t i=skip_unread(in, size, start); try { while(in[i] != end_c) { auto pp=parse_condlist(in, size, i); ret->cls.push_back(pp.first); if(is_in(in[pp.second], COMMAND_SEPARATOR)) i = skip_unread(in, size, pp.second+1); else i = skip_unread(in, size, pp.second); if(i>=size) { if(end_c != 0) { if(expecting!=NULL) throw PARSE_ERROR(strf("Expecting '%s'", expecting), start-1); else throw PARSE_ERROR(strf("Expecting '%c'", end_c), start-1); } else break; } } } catch(ztd::format_error& e) { delete ret; throw e; } return std::make_pair(ret, i); } std::pair parse_list_until(const char* in, uint32_t size, uint32_t start, std::string const& end_word) { list* ret = new list; uint32_t i=skip_unread(in, size, start); try { std::string old_expect=g_expecting; g_expecting=end_word; while(true) { // check word auto wp=get_word(in, size, i, ARG_END); if(wp.first == end_word) { i=wp.second; break; } // do a parse auto pp=parse_condlist(in, size, i); ret->cls.push_back(pp.first); if(is_in(in[pp.second], COMMAND_SEPARATOR)) i = skip_unread(in, size, pp.second+1); else i = skip_unread(in, size, pp.second); // word wasn't found if(i>=size) { throw PARSE_ERROR(strf("Expecting '%s'", end_word.c_str()), start-1); } } g_expecting=old_expect; } catch(ztd::format_error& e) { delete ret; throw e; } return std::make_pair(ret, i); } std::tuple parse_list_until(const char* in, uint32_t size, uint32_t start, std::vector const& end_words, const char* expecting=NULL) { list* ret = new list; uint32_t i=skip_unread(in, size, start);; std::string found_end_word; try { std::string old_expect=g_expecting; g_expecting=end_words[0]; bool stop=false; while(true) { // check words auto wp=get_word(in, size, i, ARG_END); for(auto it: end_words) { if(it == ";" && in[i] == ';') { found_end_word=";"; i++; stop=true; break; } if(wp.first == it) { found_end_word=it; i=wp.second; stop=true; break; } } if(stop) break; // do a parse auto pp=parse_condlist(in, size, i); ret->cls.push_back(pp.first); if(is_in(in[pp.second], COMMAND_SEPARATOR)) i = skip_unread(in, size, pp.second+1); else i = skip_unread(in, size, pp.second); // word wasn't found if(i>=size) { if(expecting!=NULL) throw PARSE_ERROR(strf("Expecting '%s'", expecting), start-1); else throw PARSE_ERROR(strf("Expecting '%s'", end_words[0].c_str()), start-1); } } g_expecting=old_expect; } catch(ztd::format_error& e) { delete ret; throw e; } return std::make_tuple(ret, i, found_end_word); } // parse a subshell // must start right after the opening ( // ends at ) and nothing else std::pair parse_subshell(const char* in, uint32_t size, uint32_t start) { uint32_t i = skip_unread(in, size, start); subshell* ret = new subshell; try { auto pp=parse_list_until(in, size, start, ')'); ret->lst=pp.first; i=pp.second; if(ret->lst->size()<=0) throw PARSE_ERROR("Subshell is empty", start-1); i++; } catch(ztd::format_error& e) { delete ret; throw e; } return std::make_pair(ret,i); } // parse a brace block // must start right after the opening { // ends at } and nothing else std::pair parse_brace(const char* in, uint32_t size, uint32_t start) { uint32_t i = skip_unread(in, size, start); brace* ret = new brace; try { auto pp=parse_list_until(in, size, start, '}'); ret->lst=pp.first; i=pp.second; if(ret->lst->size()<=0) throw PARSE_ERROR("Brace block is empty", start-1); i++; } catch(ztd::format_error& e) { delete ret; throw e; } return std::make_pair(ret,i); } // parse a function // must start right after the () // then parses a brace block std::pair parse_function(const char* in, uint32_t size, uint32_t start, const char* after="()") { uint32_t i=start; function* ret = new function; try { i=skip_unread(in, size, i); if(in[i] != '{') throw PARSE_ERROR( strf("Expecting { after %s", after) , i); i++; auto pp=parse_list_until(in, size, i, '}'); if(pp.first->size()<=0) throw PARSE_ERROR("Condition is empty", i); ret->lst=pp.first; i=pp.second; i++; } catch(ztd::format_error& e) { delete ret; throw e; } return std::make_pair(ret, i); } // must start at read char std::pair parse_cmd(const char* in, uint32_t size, uint32_t start) { cmd* ret = new cmd; uint32_t i=start; try { while(true) // parse var assigns { auto wp=get_word(in, size, i, VARNAME_END); if(wp.secondcases.push_back( std::make_pair(std::vector(), nullptr) ); // iterator to last element auto cc = ret->cases.end()-1; // toto) while(true) { pa = parse_arg(in, size, i); cc->first.push_back(pa.first); if(pa.first->sa.size() <= 0) throw PARSE_ERROR("Empty case value", i); i=skip_unread(in, size, pa.second); if(i>=size) throw PARSE_ERROR("Unexpected end of file. Expecting 'esac'", i); if(in[i] == ')') break; if(in[i] != '|' && is_in(in[i], SPECIAL_TOKENS)) throw PARSE_ERROR( strf("Unexpected token '%c', expecting ')'", in[i]), i ); i=skip_unread(in, size, i+1); } i++; // until ;; auto tp = parse_list_until(in, size, i, {";", "esac"}, ";;"); cc->second = std::get<0>(tp); i = std::get<1>(tp); std::string word = std::get<2>(tp); if(word == "esac") { i -= 4; break; } if(i >= size) throw PARSE_ERROR("Expecting ';;'", i); if(in[i-1] != ';') throw PARSE_ERROR("Unexpected token: ';'", i); i=skip_unread(in, size, i+1); } // ended before finding esac if(i>=size) throw PARSE_ERROR("Expecting 'esac'", i); i+=4; } catch(ztd::format_error& e) { if(ret != nullptr) delete ret; throw e; } return std::make_pair(ret, i); } std::pair parse_if(const char* in, uint32_t size, uint32_t start) { if_block* ret = new if_block; uint32_t i=start; try { while(true) { std::string word; ret->blocks.push_back(std::make_pair(nullptr, nullptr)); auto ll = ret->blocks.end()-1; auto pp=parse_list_until(in, size, i, "then"); ll->first = pp.first; i = pp.second; if(ll->first->size()<=0) throw PARSE_ERROR("Condition is empty", i); auto tp=parse_list_until(in, size, i, {"fi", "elif", "else"}); ll->second = std::get<0>(tp); i = std::get<1>(tp); word = std::get<2>(tp); if(std::get<0>(tp)->size() <= 0) throw PARSE_ERROR("if block is empty", i); if(word == "fi") break; if(word == "else") { auto pp=parse_list_until(in, size, i, "fi"); if(pp.first->size()<=0) throw PARSE_ERROR("else block is empty", i); ret->else_lst=pp.first; i=pp.second; break; } } } catch(ztd::format_error& e) { delete ret; throw e; } return std::make_pair(ret, i); } std::pair parse_for(const char* in, uint32_t size, uint32_t start) { for_block* ret = new for_block; uint32_t i=skip_chars(in, size, start, SPACES); try { auto wp = get_word(in, size, i, ARG_END); if(!valid_name(wp.first)) throw PARSE_ERROR( strf("Bad identifier in for clause: '%s'", wp.first.c_str()), i ); ret->varname = wp.first; i=skip_chars(in, size, wp.second, SPACES); // in wp = get_word(in, size, i, ARG_END); if(wp.first == "in") { i=skip_chars(in, size, wp.second, SPACES); auto pp = parse_arglist(in, size, i); ret->iter = pp.first; i = pp.second; } else if(wp.first != "") throw PARSE_ERROR( "Expecting 'in' after for", i ); // end of arg list if(!is_in(in[i], "\n;#")) throw PARSE_ERROR( strf("Unexpected token '%c', expecting '\\n' or ';'", in[i]), i ); if(in[i] == ';') i++; i=skip_unread(in, size, i); // do wp = get_word(in, size, i, ARG_END); if(wp.first != "do") throw PARSE_ERROR( "Expecting 'do', after for", i); i=skip_unread(in, size, wp.second); // ops auto lp = parse_list_until(in, size, i, "done"); ret->ops=lp.first; i=lp.second; } catch(ztd::format_error& e) { delete ret; throw e; } return std::make_pair(ret, i); } std::pair parse_while(const char* in, uint32_t size, uint32_t start) { while_block* ret = new while_block; uint32_t i=start; try { // cond auto pp=parse_list_until(in, size, i, "do"); ret->cond = pp.first; i = pp.second; if(ret->cond->size() <= 0) throw PARSE_ERROR("condition is empty", i); // ops auto lp = parse_list_until(in, size, i, "done"); ret->ops=lp.first; i = lp.second; if(ret->ops->size() <= 0) throw PARSE_ERROR("while is empty", i); } catch(ztd::format_error& e) { delete ret; throw e; } return std::make_pair(ret, i); } // detect if brace, subshell, case or other std::pair parse_block(const char* in, uint32_t size, uint32_t start) { uint32_t i = skip_chars(in, size, start, SEPARATORS); block* ret = nullptr; try { if(i>=size) throw PARSE_ERROR("Unexpected end of file", i); if( in[i] == '(' ) //subshell { auto pp = parse_subshell(in, size, i+1); ret = pp.first; i = pp.second; } else { auto wp=get_word(in, size, i, BLOCK_TOKEN_END); std::string word=wp.first; // reserved words if( word == "{" ) // brace block { auto pp = parse_brace(in, size, wp.second); ret = pp.first; i = pp.second; } else if(word == "case") // case { auto pp = parse_case(in, size, wp.second); ret = pp.first; i = pp.second; } else if( word == "if" ) // if { auto pp=parse_if(in, size, wp.second); ret = pp.first; i = pp.second; } else if( word == "for" ) { auto pp=parse_for(in, size, wp.second); ret = pp.first; i = pp.second; } else if( word == "while" ) { auto pp=parse_while(in, size, wp.second); ret = pp.first; i = pp.second; } else if( word == "until" ) { auto pp=parse_while(in, size, wp.second); pp.first->real_condition()->negate(); ret = pp.first; i = pp.second; } else if(word_is_reserved_out(word)) { throw PARSE_ERROR( "Unexpected '"+word+"'" + expecting(g_expecting) , i); } // end reserved words else if( word == "function" ) // bash style function { auto wp2=get_word(in, size, skip_unread(in, size, wp.second), VARNAME_END); if(!valid_name(wp2.first)) throw PARSE_ERROR( strf("Bad function name: '%s'", word.c_str()), start ); i=skip_unread(in, size, wp2.second); if(word_eq("()", in, size, i)) i=skip_unread(in, size, i+2); auto pp = parse_function(in, size, i, "function definition"); // function name pp.first->name = wp2.first; ret = pp.first; i = pp.second; } else if(word_eq("()", in, size, skip_unread(in, size, wp.second))) // is a function { if(!valid_name(word)) throw PARSE_ERROR( strf("Bad function name: '%s'", word.c_str()), start ); auto pp = parse_function(in, size, skip_unread(in, size, wp.second)+2); // first arg is function name pp.first->name = word; ret = pp.first; i = pp.second; } else // is a command { auto pp = parse_cmd(in, size, i); ret = pp.first; i = pp.second; } } if(ret->type != block::block_cmd) { auto pp=parse_arglist(in, size, i, false); // in case of redirects if(pp.first->args.size()>0) { i = pp.second; ret->redirs=pp.first; } else { delete pp.first; } } } catch(ztd::format_error& e) { if(ret != nullptr) delete ret; throw e; } return std::make_pair(ret,i); } // parse main shmain* parse_text(const char* in, uint32_t size, std::string const& filename) { shmain* ret = new shmain(); uint32_t i=0; try { ret->filename=filename; // get shebang if(word_eq("#!", in, size, 0)) { i=skip_until(in, size, 0, "\n"); ret->shebang=std::string(in, i); } i = skip_unread(in, size, i); // parse all commands auto pp=parse_list_until(in, size, i, 0); ret->lst=pp.first; i=pp.second; } catch(ztd::format_error& e) { delete ret; throw ztd::format_error(e.what(), filename, e.data(), e.where()); } return ret; } // import a file's contents into a string std::string import_file(std::string const& path) { std::ifstream st(path); if(!st) throw std::runtime_error("Cannot open stream to '"+path+'\''); std::string ret, ln; while(getline(st, ln)) { ret += ln + '\n'; } st.close(); return ret; }