#include "parse.hpp" #include #include #include #include "util.hpp" std::string g_origin; inline bool is_in(char c, const char* set) { return strchr(set, c) != NULL; } inline bool is_alphanum(char c) { return (c >= 'a' && c<='z') || (c >= 'A' && c<='Z') || (c >= '0' && c<='9'); } inline bool is_alpha(char c) { return (c >= 'a' && c<='z') || (c >= 'A' && c<='Z'); } bool word_eq(const char* word, const char* in, uint32_t size, uint32_t start, const char* end_set=NULL) { uint32_t wordsize=strlen(word); if(wordsize > size-start) return false; if(strncmp(word, in+start, wordsize) == 0) { if(end_set==NULL) return true; // end set if(wordsize < size-start) return is_in(in[start+wordsize], end_set); } return false; } std::string get_word(const char* in, uint32_t size, uint32_t start, const char* end_set) { uint32_t i=start; while(i parse_subshell(const char* in, uint32_t size, uint32_t start); // parse one argument // must start at a read char // ends at either " \t|&;\n()" std::pair parse_arg(const char* in, uint32_t size, uint32_t start) { arg ret; // j : start of subarg uint32_t i=start,j=start,q=start; if(is_in(in[i], "&|;\n#()")) throw ztd::format_error( strf("Unexpected token '%c'", in[i]) , g_origin, in, i); while(i") && in[i+1]=='&') // special case for <& and >& { i+=2; } else if(in[i]=='\\') // backslash: don't check next char { i++; if(i>=size) break; i++; } else if(in[i] == '"') // start double quote { q=i; i++; while(in[i] != '"') // while inside quoted string { if(in[i] == '\\') // backslash: don't check next char { i+=2; } else if( word_eq("$(", in, size, i) ) // substitution { // add string subarg ret.sa.push_back(subarg(std::string(in+j, i-j))); i+=2; // add subshell subarg auto r=parse_subshell(in, size, i); ret.sa.push_back(subarg(r.first)); j = i = r.second; } else i++; if(i>=size) throw ztd::format_error("Unterminated double quote", g_origin, in, q); } i++; } else if(in[i] == '\'') // start single quote { q=i; i++; while(i=size) throw ztd::format_error("Unterminated single quote", g_origin, in, q); i++; } else if( word_eq("$(", in, size, i) ) // substitution { ret.sa.push_back(subarg(std::string(in+j, i-j))); i+=2; auto r=parse_subshell(in, size, i); ret.sa.push_back(subarg(r.first)); j = i = r.second; } else i++; } // add string subarg std::string val=std::string(in+j, i-j); ret.sa.push_back(subarg(val)); // raw string for other uses ret.raw = std::string(in+start, i-start); return std::make_pair(ret, i); } // parse one list of arguments (a command for instance) // must start at a read char // first char has to be read // ends at either &|;\n#() std::pair parse_arglist(const char* in, uint32_t size, uint32_t start, bool hard_error=false) { uint32_t i=start; arglist ret; if(is_in(in[i], "&|;\n#(){}")) { if(hard_error) throw ztd::format_error( strf("Unexpected token '%c'", in[i]) , g_origin, in, i); else return std::make_pair(ret, i); } while(i=size || is_in(in[i], "&|;\n#()") ) return std::make_pair(ret, i); } return std::make_pair(ret, i); } std::pair parse_block(const char* in, uint32_t size, uint32_t start); // parse a pipeline // must start at a read char // separated by | // ends at either &;\n#) std::pair parse_pipeline(const char* in, uint32_t size, uint32_t start) { uint32_t i=start; pipeline ret; while(i=size || is_in(in[i], "&;\n#)") ) || word_eq("||", in, size, i) ) return std::make_pair(ret, i); else if( in[i] != '|') throw ztd::format_error( strf("Unexpected token: '%c'", in[i] ), g_origin, in, i); i++; } return std::make_pair(ret, i); } // parse condition lists // must start at a read char // separated by && or || // ends at either ;\n)# std::pair parse_condlist(const char* in, uint32_t size, uint32_t start) { uint32_t i = skip_unread(in, size, start); condlist ret; bool optype=AND_OP; while(i=size || is_in(in[i], ")#")) // end here exactly: used for control later { return std::make_pair(ret, i); } else if(is_in(in[i], ";\n")) // end one char after: skip them for next parse { i++; return std::make_pair(ret, i); } else if( word_eq("&", in, size, i) && !word_eq("&&", in, size, i) ) // parallel: end one char after { ret.parallel=true; i++; return std::make_pair(ret, i); } else if( word_eq("&&", in, size, i) ) // and op { i += 2; optype=AND_OP; } else if( word_eq("||", in, size, i) ) // or op { i += 2; optype=OR_OP; } else if(i=size) throw ztd::format_error( "Unexpected end of file", g_origin, in, i ); } return std::make_pair(ret, i); } // parse a subshell // must start right after the opening ( // ends at ) and nothing else std::pair parse_subshell(const char* in, uint32_t size, uint32_t start) { uint32_t i = skip_unread(in, size, start); block ret(block::subshell); while(in[i] != ')') { auto pp=parse_condlist(in, size, i); ret.cls.push_back(pp.first); i = skip_unread(in, size, pp.second); if(i>=size) throw ztd::format_error("Expecting )", g_origin, in, start-1); } if(ret.cls.size()<=0) throw ztd::format_error("Subshell is empty", g_origin, in, start-1); i++; return std::make_pair(ret,i); } // parse a brace block // must start right after the opening { // ends at } and nothing else std::pair parse_brace(const char* in, uint32_t size, uint32_t start) { uint32_t i = skip_unread(in, size, start); block ret(block::brace); while(in[i] != '}') { auto pp=parse_condlist(in, size, i); ret.cls.push_back(pp.first); i = skip_unread(in, size, pp.second); if(i>=size) throw ztd::format_error("Expecting }", g_origin, in, start-1); if(is_in(in[i], ")")) throw ztd::format_error( strf("Unexpected token: '%c'", in[i]) , g_origin, in, i ); } if(ret.cls.size()<=0) throw ztd::format_error("Brace block is empty", g_origin, in, start-1); i++; return std::make_pair(ret,i); } // parse a functions // must start right after the () // then parses a brace block std::pair parse_function(const char* in, uint32_t size, uint32_t start) { block ret(block::function); uint32_t i=start; i=skip_unread(in, size, i); if(in[i] != '{') throw ztd::format_error("Expecting { after ()", g_origin, in, i); i++; auto pp = parse_brace(in, size, i); ret.cls = pp.first.cls; i=pp.second; return std::make_pair(ret, i); } std::pair parse_cmd(const char* in, uint32_t size, uint32_t start) { block ret(block::cmd); uint32_t i=start; // parse first arg and keep it auto tp=parse_arg(in, size, i); i=skip_unread(in, size, tp.second); if(word_eq("()", in, size, i)) // is a function { i += 2; auto pp = parse_function(in, size, i); // first arg is function name pp.first.shebang = tp.first.raw; return pp; } else // is a command { auto pp=parse_arglist(in, size, start, true); ret.args = pp.first; i = pp.second; } return std::make_pair(ret, i); } // parse a case block // must start right after the case // ends at } and nothing else std::pair parse_case(const char* in, uint32_t size, uint32_t start) { block ret(block::case_block); uint32_t i=skip_chars(in, size, start, " \t");; // get the treated argument auto pa = parse_arg(in, size, i); ret.carg = pa.first; i=skip_unread(in, size, pa.second); // must be an 'in' if(!word_eq("in", in, size, i, " \t\n")) { std::string pp=get_word(in, size, i, " \t\n"); throw ztd::format_error("Unexpected word: '"+pp+"', expecting 'in' after case", g_origin, in, i); } i=skip_unread(in, size, i+2); // parse all cases while(i cc; pa = parse_arg(in, size, i); if(pa.first.raw == "") throw ztd::format_error("Empty case value", g_origin, in, i); cc.first = pa.first; i=skip_unread(in, size, pa.second); if(in[i] != ')') throw ztd::format_error( strf("Unexpected token '%c', expecting ')'", in[i]), g_origin, in, i ); i++; while(true) // blocks { auto pc = parse_condlist(in, size, i); cc.second.push_back(pc.first); i=pc.second; if(i+1>=size) throw ztd::format_error("Expecting ';;'", g_origin, in, i); if(in[i] == ')') throw ztd::format_error( strf("Unexpected token '%c', expecting ';;'", in[i]), g_origin, in, i ); // end of case: on same line if(in[i-1] == ';' && in[i] == ';') { i++; break; } // end of case: on new line i=skip_unread(in, size, i); if(word_eq(";;", in, size, i)) { i+=2; break; } // end of block: ignore missing ;; if(word_eq("esac", in, size, i)) break; } i=skip_unread(in, size, i); ret.cases.push_back(cc); } // ended before finding esac if(i>=size) throw ztd::format_error("Expecting 'esac'", g_origin, in, i); i+=4; return std::make_pair(ret, i); } // detect if brace, subshell, case or other std::pair parse_block(const char* in, uint32_t size, uint32_t start) { uint32_t i = skip_chars(in, size, start, " \n\t"); if(i>=size) throw ztd::format_error("Unexpected end of file", g_origin, in, i); std::pair ret; if(in[i] == '{') // brace block { i++; ret = parse_brace(in, size, i); } else if(in[i] == '(') //subshell { i++; ret = parse_subshell(in, size, i); } else if(word_eq("case", in, size, i)) { ret = parse_case(in, size, i+4); } else // command { ret = parse_cmd(in, size, i); } if(ret.first.args.args.size()<=0) { auto pp=parse_arglist(in, size, ret.second, false); // in case of redirects ret.second=pp.second; ret.first.args=pp.first; } return ret; } // parse main block parse(const char* in, uint32_t size) { block ret(block::main); uint32_t i=0; // get shebang if(word_eq("#!", in, size, 0)) { i=skip_until(in, size, 0, "\n"); ret.shebang=std::string(in, i); } i = skip_unread(in, size, i); // parse all commands while(i