diff --git a/Makefile b/Makefile index 91049f4..c7f6dc5 100644 --- a/Makefile +++ b/Makefile @@ -14,65 +14,76 @@ LDFLAGS = -lpthread # compiler CC=g++ # compiler flags -CXXFLAGS= -I$(IDIR) -Wall -pedantic -std=c++20 +CXXFLAGS= -I$(IDIR) -Wall -std=c++20 ifeq ($(DEBUG),true) - # debugging flags - CC=clang++ - CXXFLAGS += -g -pg -D NO_PARSE_CATCH + # debugging flags + CXXFLAGS += -g -D DEBUG_MODE + RODIR = $(ODIR)/debug else - # release flags - CXXFLAGS += -Ofast + # release flags + CXXFLAGS += -Ofast + RODIR = $(ODIR)/release +endif +ifeq ($(STATIC),true) + # static links + LDFLAGS += -l:libztd.a +else + # dynamic links + LDFLAGS += -lztd +endif + + +ifeq ($(PROFILE),true) + CXXFLAGS += -pg endif ifneq ($(RELEASE), true) VSUFFIX=-dev-$(SHA_SHORT) endif -ifeq ($(STATIC),true) - # static links - LDFLAGS += -l:libztd.a -else - # dynamic links - LDFLAGS += -lztd -endif - ## END CONFIG ## + $(shell ./generate_version.sh) $(shell ./generate_shellcode.sh) -$(shell mkdir -p $(ODIR)) + +$(shell mkdir -p $(RODIR)) $(shell mkdir -p $(BINDIR)) # automatically find .h and .hpp -DEPS = $(shell find $(IDIR) -type f -regex '.*\.hp?p?' ! -name 'g_version.h' ! -name 'g_shellcode.h') +DEPS = $(shell find $(IDIR) -type f -regex '.*\.hp?p?') # automatically find .c and .cpp and make the corresponding .o rule -OBJ = $(shell find $(SRCDIR) -type f -regex '.*\.cp?p?' | sed 's|\.cpp|.o|g;s|\.c|.o|g;s|^$(SRCDIR)/|$(ODIR)/|g') +OBJ = $(shell find $(SRCDIR) -type f -regex '.*\.cp?p?' | sed 's|\.cpp|.o|g;s|\.c|.o|g;s|^$(SRCDIR)/|$(RODIR)/|g') -build: lxsh $(OBJ) $(DEPS) +build: $(BINDIR)/$(NAME) -$(ODIR)/%.o: $(SRCDIR)/%.c $(DEPS) +# specific files for autogenerated headers +$(OBJDIR)/options.o: $(SRCDIR)/options.cpp $(DEPS) $(IDIR)/g_version.h $(CC) $(CXXFLAGS) -c -o $@ $< -$(ODIR)/%.o: $(SRCDIR)/%.cpp $(DEPS) +$(OBJDIR)/shellcode.o: $(SRCDIR)/shellcode.cpp $(DEPS) $(IDIR)/g_shellcode.h $(CC) $(CXXFLAGS) -c -o $@ $< -$(ODIR)/options.o: $(SRCDIR)/options.cpp $(DEPS) $(IDIR)/g_version.h +$(OBJDIR)/debashify.o: $(SRCDIR)/debashify.cpp $(DEPS) $(IDIR)/g_shellcode.h $(CC) $(CXXFLAGS) -c -o $@ $< -$(ODIR)/shellcode.o: $(SRCDIR)/shellcode.cpp $(DEPS) $(IDIR)/g_shellcode.h +# generic files + +$(RODIR)/%.o: $(SRCDIR)/%.c $(DEPS) $(CC) $(CXXFLAGS) -c -o $@ $< -$(ODIR)/debashify.o: $(SRCDIR)/debashify.cpp $(DEPS) $(IDIR)/g_shellcode.h +$(RODIR)/%.o: $(SRCDIR)/%.cpp $(DEPS) $(CC) $(CXXFLAGS) -c -o $@ $< -lxsh: $(OBJ) + +$(BINDIR)/$(NAME): $(OBJ) $(CC) $(CXXFLAGS) -o $@ $^ $(LDFLAGS) test: $(BINDIR)/$(NAME) $(BINDIR)/$(NAME) clean: - rm $(ODIR)/*.o gmon.out + rm $(ODIR)/*/*.o clear: rm $(BINDIR)/$(NAME) diff --git a/README.md b/README.md index e0ded7b..970ce19 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ Extended shell linker for linking, processing and minifying shell code ### zpkg -Available from the `zpkg` repository: +Available from the [zpkg](https://github.com/zawwz/zpkg) repository: ```shell wget -qO- https://zpkg.zawz.net/install.sh | sh zpkg install lxsh @@ -20,7 +20,7 @@ Download the `lxsh.tar.gz` archive, extract it, and move the `lxsh` binary in a PATH folder (`/usr/local/bin` is the recommended). ```shell -wget https://github.com/zawwz/lxsh/releases/download/v1.1.0/lxsh.tar.gz +wget https://github.com/zawwz/lxsh/releases/download/v1.2.0/lxsh-linux-amd64.tar.gz tar -xvf lxsh.tar.gz sudo mv lxsh /usr/local/bin ``` @@ -122,6 +122,23 @@ these features will continue working with undesired behavior. Array argument with `[@]` does not expand into the desired multiple arguments. +## Extension commands + +If you use the `#!/usr/bin/lxsh` shebang, you can use special lxsh-defined commands. +To list such commands, see `lxsh --help-extend-fcts` + +## String processors + +You can use prefixes in singlequote strings to apply processing to the string contents.
+To use string processors, prefix the string content with a line in the form of `#`. +Example: +```shell +sh -c '#LXSH_PARSE_MINIFY +printf "%s\n" "Hello world!"' +``` + +As of now only the processor `LXSH_PARSE_MINIFY` is implemented, but more may come later + ## Other features ### Output generated code @@ -154,7 +171,7 @@ Depends on [ztd](https://github.com/zawwz/ztd) ## Building -Use `make -j13` to build.
+Use `make -j` to build.
You can use environment variables to alter some aspects: - DEBUG: when set to `true` will generate a debug binary with profiling - RELEASE: when set to `true`, the version string will be generated for release format diff --git a/include/debashify.hpp b/include/debashify.hpp index 8c56b16..7fb96d0 100644 --- a/include/debashify.hpp +++ b/include/debashify.hpp @@ -6,13 +6,13 @@ #include #include -typedef struct debashify_params { +struct debashify_params { std::set required_fcts; void require_fct(std::string const& in) { required_fcts.insert(in); } // map of detected arrays // bool value: is associative std::map arrays; -} debashify_params; +}; bool r_debashify(_obj* o, debashify_params* params); diff --git a/include/errcodes.h b/include/errcodes.h index 25ce563..cf48b04 100644 --- a/include/errcodes.h +++ b/include/errcodes.h @@ -1,9 +1,9 @@ #ifndef ERRCODES_H #define ERRCODES_H -#define ERR_HELP 1001 -#define ERR_OPT 1002 -#define ERR_PARSE 1003 -#define ERR_RUNTIME 1004 +#define ERR_HELP 101 +#define ERR_OPT 102 +#define ERR_PARSE 103 +#define ERR_RUNTIME 104 #endif //ERRCODES_H diff --git a/include/exec.hpp b/include/exec.hpp index 109f2b3..b848675 100644 --- a/include/exec.hpp +++ b/include/exec.hpp @@ -2,11 +2,11 @@ #define EXEC_HPP #include "options.hpp" +#include "parse.hpp" -void parse_exec(FILE* fd, const char* in, uint32_t size, std::string const& filename=""); -inline void parse_exec(FILE* fd, std::string const& in, std::string const& filename="") { parse_exec(fd, in.c_str(), in.size(), filename); } +void parse_exec(FILE* fd, parse_context ct); -int exec_process(std::string const& runtime, std::vector const& args, std::string const& filecontents, std::string const& file); +int exec_process(std::string const& runtime, std::vector const& args, parse_context ct); #endif //EXEC_HPP diff --git a/include/parse.hpp b/include/parse.hpp index 6f9651c..9a49959 100644 --- a/include/parse.hpp +++ b/include/parse.hpp @@ -6,14 +6,14 @@ #include #include #include - -#include +#include #define SPACES " \t" #define SEPARATORS " \t\n" #define ARG_END " \t\n;#()&|<>" #define VARNAME_END " \t\n;#()&|=\"'\\{}/-+" #define BLOCK_TOKEN_END " \t\n;#()&|=\"'\\" +#define BASH_BLOCK_END " \t\n;#()&|=\"'\\{}" #define COMMAND_SEPARATOR "\n;" #define CONTROL_END "#)" #define PIPELINE_END "\n;#()&" @@ -27,59 +27,94 @@ #define ARRAY_ARG_END " \t\n;#()&|<>]" // macros -#define PARSE_ERROR(str, i) ztd::format_error(str, "", in, i) +// #define PARSE_ERROR_I(str, ctx, i) format_error(str, ctx.filename, ctx.data, i) +// #define PARSE_ERROR(str, ctx) format_error(str, ctx.filename, ctx.data, ctx.i) +// #define PARSE_ERROR_I(str, ctx, i) { printFormatError(format_error(str, ctx.filename, ctx.data, i)); ctx.has_errored=true; } +// #define PARSE_ERROR(str, ctx) { printFormatError(format_error(str, ctx.filename, ctx.data, ctx.i)); ctx.has_errored=true; } + +// structs + +struct list_parse_options { + char end_char=0; + bool word_mode=false; + std::vector end_words={}; + const char* expecting=NULL; +}; // globals -extern bool g_bash; - extern const std::vector posix_cmdvar; extern const std::vector bash_cmdvar; std::string import_file(std::string const& path); -shmain* parse_text(const char* in, uint32_t size, std::string const& filename=""); -inline shmain* parse_text(std::string const& in, std::string const& filename="") { return parse_text(in.c_str(), in.size(), filename); } -inline shmain* parse(std::string const& file) { return parse_text(import_file(file), file); } +std::pair parse_text(parse_context context); +std::pair parse_text(std::string const& in, std::string const& filename=""); +inline std::pair parse(std::string const& file) { return parse_text(import_file(file), file); } + +// tools + +parse_context make_context(std::string const& in, std::string const& filename="", bool bash=false); +parse_context make_context(parse_context ctx, std::string const& in="", std::string const& filename="", bool bash=false); +parse_context make_context(parse_context ctx, uint64_t i); +parse_context operator+(parse_context ctx, int64_t a); +parse_context operator-(parse_context ctx, int64_t a); + +// error handlers +void parse_error(std::string const& message, parse_context& ctx); +void parse_error(std::string const& message, parse_context& ctx, uint64_t i); // ** unit parsers ** // /* util parsers */ -bool word_eq(const char* word, const char* in, uint32_t size, uint32_t start, const char* end_set=NULL); -std::pair get_word(const char* in, uint32_t size, uint32_t start, const char* end_set); +uint32_t word_eq(const char* word, const char* in, uint32_t size, uint32_t start, const char* end_set=NULL); +inline bool word_eq(const char* word, parse_context const& ct, const char* end_set=NULL) { + return word_eq(word, ct.data, ct.size, ct.i, end_set); +} +std::pair get_word(parse_context ct, const char* end_set); uint32_t skip_chars(const char* in, uint32_t size, uint32_t start, const char* set); +inline uint32_t skip_chars(parse_context const& ct, const char* set) { + return skip_chars(ct.data, ct.size, ct.i, set); +} uint32_t skip_until(const char* in, uint32_t size, uint32_t start, const char* set); +inline uint32_t skip_until(parse_context const& ct, const char* set) { + return skip_until(ct.data, ct.size, ct.i, set); +} uint32_t skip_unread(const char* in, uint32_t size, uint32_t start); +inline uint32_t skip_unread(parse_context const& ct) { + return skip_unread(ct.data, ct.size, ct.i); +} // list -std::pair parse_list_until(const char* in, uint32_t size, uint32_t start, char end_c, const char* expecting=NULL); -std::pair parse_list_until(const char* in, uint32_t size, uint32_t start, std::string const& end_word); -std::tuple parse_list_until(const char* in, uint32_t size, uint32_t start, std::vector const& end_words, const char* expecting=NULL); +// std::pair parse_list_until(parse_context ct, char end_c, const char* expecting=NULL); +// std::pair parse_list_until(parse_context ct, std::string const& end_word); +// std::tuple parse_list_until(parse_context ct, std::vector const& end_words, const char* expecting=NULL); +std::tuple parse_list_until(parse_context ct, list_parse_options opts={}); // name -std::pair parse_var(const char* in, uint32_t size, uint32_t start, bool specialvars=true, bool array=false); +std::pair parse_var(parse_context ct, bool specialvars=true, bool array=false); // subarg parsers -std::pair parse_arithmetic(const char* in, uint32_t size, uint32_t start); -std::pair parse_manipulation(const char* in, uint32_t size, uint32_t start); +std::pair parse_arithmetic(parse_context ct); +std::pair parse_manipulation(parse_context ct); // arg parser -std::pair parse_arg(const char* in, uint32_t size, uint32_t start, const char* end=ARG_END, const char* unexpected=SPECIAL_TOKENS, bool doquote=true); +std::pair parse_arg(parse_context ct, const char* end=ARG_END, const char* unexpected=SPECIAL_TOKENS, bool doquote=true); // redirect parser -std::pair parse_redirect(const char* in, uint32_t size, uint32_t start); +std::pair parse_redirect(parse_context ct); // arglist parser -std::pair parse_arglist(const char* in, uint32_t size, uint32_t start, bool hard_error=false, std::vector* redirs=nullptr); +std::pair parse_arglist(parse_context ct, bool hard_error=false, std::vector* redirs=nullptr); // block parsers -std::pair parse_block(const char* in, uint32_t size, uint32_t start); -std::pair parse_cmd(const char* in, uint32_t size, uint32_t start); -std::pair parse_function(const char* in, uint32_t size, uint32_t start, const char* after="()"); -std::pair parse_subshell(const char* in, uint32_t size, uint32_t start); -std::pair parse_brace(const char* in, uint32_t size, uint32_t start); -std::pair parse_case(const char* in, uint32_t size, uint32_t start); -std::pair parse_if(const char* in, uint32_t size, uint32_t start); -std::pair parse_for(const char* in, uint32_t size, uint32_t start); -std::pair parse_while(const char* in, uint32_t size, uint32_t start); +std::pair parse_block(parse_context ct); +std::pair parse_cmd(parse_context ct); +std::pair parse_function(parse_context ct, const char* after="()"); +std::pair parse_subshell(parse_context ct); +std::pair parse_brace(parse_context ct); +std::pair parse_case(parse_context ct); +std::pair parse_if(parse_context ct); +std::pair parse_for(parse_context ct); +std::pair parse_while(parse_context ct); // pipeline parser -std::pair parse_pipeline(const char* in, uint32_t size, uint32_t start); +std::pair parse_pipeline(parse_context ct); // condlist parser -std::pair parse_condlist(const char* in, uint32_t size, uint32_t start); +std::pair parse_condlist(parse_context ct); #endif //PARSE_HPP diff --git a/include/processing.hpp b/include/processing.hpp index d3f54a1..26ab7d6 100644 --- a/include/processing.hpp +++ b/include/processing.hpp @@ -76,4 +76,6 @@ bool r_delete_var(_obj* in, set_t* vars); std::set find_lxsh_commands(shmain* sh); void add_unset_variables(shmain* sh, std::regex const& exclude); +void string_processors(_obj* in); + #endif //PROCESSING_HPP diff --git a/include/resolve.hpp b/include/resolve.hpp index d93a3e6..360506a 100644 --- a/include/resolve.hpp +++ b/include/resolve.hpp @@ -2,16 +2,16 @@ #define RESOLVE_HPP #include "struc.hpp" +#include "parse.hpp" extern std::vector included; -std::vector> do_include_raw(condlist* cmd, std::string const& filename, std::string* ex_dir=nullptr); -std::pair do_resolve_raw(condlist* cmd, std::string const& filename, std::string* ex_dir=nullptr); +std::vector> do_include_raw(condlist* cmd, parse_context ctx, std::string* ex_dir=nullptr); +std::pair do_resolve_raw(condlist* cmd, parse_context ctx, std::string* ex_dir=nullptr); bool add_include(std::string const& file); -void resolve(_obj* sh, std::string* filename); -void resolve(shmain* sh); +void resolve(_obj* sh, parse_context ctx); std::string _pre_cd(std::string const& filename); void _cd(std::string const& dir); diff --git a/include/shellcode.hpp b/include/shellcode.hpp index aec424f..b6ab058 100644 --- a/include/shellcode.hpp +++ b/include/shellcode.hpp @@ -15,9 +15,9 @@ struct lxsh_fct { std::vector depends_on=std::vector(); }; -extern const std::map lxsh_extend_fcts; -extern const std::map lxsh_array_fcts; -extern const std::map lxsh_allfcts; +extern const std::map lxsh_extend_fcts; +extern const std::map lxsh_array_fcts; +extern const std::map lxsh_allfcts; void add_lxsh_fcts(shmain* sh, std::set fcts); diff --git a/include/struc.hpp b/include/struc.hpp index 7ffec33..dabb156 100644 --- a/include/struc.hpp +++ b/include/struc.hpp @@ -62,6 +62,8 @@ subarg: can be one of */ +// pre-definitions + #define AND_OP false #define OR_OP true @@ -71,6 +73,56 @@ class pipeline; class arg; class subarg; class cmd; +class redirect; + +// structs + +struct parse_context { + const char* data=NULL; + uint64_t size=0; + uint64_t i=0; + const char* filename=""; + bool bash=false; + const char* expecting=""; + const char* here_delimiter=""; + const char* here_doc=""; + const char operator[](uint64_t a) { return data[a]; } + bool has_errored=false; + redirect* here_document=nullptr; + char* here_delimitor=NULL; +}; + +struct generate_context { + arg* here_document=nullptr; +}; + +// exceptions + +class format_error : public std::exception +{ +public: + //! @brief Conctructor + inline format_error(const std::string& what, const std::string& origin, const std::string& data, int where, std::string level="error") { desc=what; index=where; filename=origin; sdat=data; severity=level; } + inline format_error(const std::string& what, parse_context const& ctx, std::string level="error") { desc=what; index=ctx.i; filename=ctx.filename; sdat=ctx.data; severity=level; } + //! @brief Error message + inline const char * what () const throw () {return desc.c_str();} + //! @brief Origin of the data, name of imported file, otherwise empty if generated + inline const char * origin() const throw () {return filename.c_str();} + //! @brief Data causing the exception + inline const char * data() const throw () {return sdat.c_str();} + //! @brief Severity of the exception + inline const std::string level() const throw () {return severity.c_str();} + //! @brief Where the error is located in the data + inline const int where () const throw () {return index;} +private: + std::string desc; + int index; + std::string filename; + std::string sdat; + std::string severity; +}; + +// objects // type pack of condlist typedef std::vector arglist_t; @@ -135,11 +187,15 @@ public: std::vector sa; + bool is_string(); // return if is a string and only one subarg std::string string(); // return if the first subarg is a string std::string first_sa_string(); + // can expand into multiple arguments + bool can_expand(); + inline bool equals(std::string const& in) { return this->string() == in; } std::string generate(int ind); @@ -179,6 +235,9 @@ public: std::vector strargs(uint32_t start); + // potentially expands into more arguments than its size + bool can_expand(); + void insert(uint32_t i, arg* val); void insert(uint32_t i, arglist const& lst); @@ -190,15 +249,20 @@ public: class redirect : public _obj { public: - redirect(std::string strop="") { type=_obj::_redirect; op=strop; target=nullptr; } - redirect(arg* in) { type=_obj::_redirect; target=in; } - redirect(std::string strop, arg* in) { type=_obj::_redirect; op=strop; target=in; } - ~redirect() { if(target != nullptr) delete target; } + redirect(std::string strop="") { type=_obj::_redirect; op=strop; target=nullptr; here_document=nullptr; } + redirect(arg* in) { type=_obj::_redirect; target=in; here_document=nullptr; } + redirect(std::string strop, arg* in) { type=_obj::_redirect; op=strop; target=in; here_document=nullptr; } + redirect(std::string strop, arg* in, arg* doc) { type=_obj::_redirect; op=strop; target=in; here_document=doc; } + ~redirect() { + if(target != nullptr) delete target; + if(here_document != nullptr) delete here_document; + } std::string generate(int ind); std::string op; arg* target; + arg* here_document; }; // Meta block @@ -213,9 +277,9 @@ public: // subshell: return the containing cmd, if it is a single command cmd* single_cmd(); - std::string generate_redirs(int ind, std::string const& _str); + std::string generate_redirs(int ind, std::string const& _str, generate_context* ctx); - virtual std::string generate(int ind)=0; + virtual std::string generate(int ind, generate_context* ctx)=0; }; // PL @@ -230,7 +294,8 @@ public: bool negated; // negated return value (! at start) - std::string generate(int ind); + std::string generate(int ind, generate_context* ctx); + std::string generate(int ind) { return this->generate(ind, nullptr); }; }; // CL @@ -331,7 +396,8 @@ public: arglist* args; - std::string generate(int ind); + std::string generate(int ind, generate_context* ctx); + std::string generate(int ind) { return this->generate(ind, nullptr); } }; class shmain : public block @@ -349,7 +415,8 @@ public: list* lst; std::string generate(bool print_shebang=true, int ind=0); - std::string generate(int ind); + std::string generate(int ind, generate_context* ctx); + std::string generate(int ind) { return this->generate(ind, nullptr); } }; class subshell : public block @@ -365,7 +432,8 @@ public: list* lst; - std::string generate(int ind); + std::string generate(int ind, generate_context* ctx); + std::string generate(int ind) { return this->generate(ind, nullptr); } }; class brace : public block @@ -380,7 +448,8 @@ public: list* lst; - std::string generate(int ind); + std::string generate(int ind, generate_context* ctx); + std::string generate(int ind) { return this->generate(ind, nullptr); } }; class function : public block @@ -394,7 +463,8 @@ public: std::string name; list* lst; - std::string generate(int ind); + std::string generate(int ind, generate_context* ctx); + std::string generate(int ind) { return this->generate(ind, nullptr); } }; class case_block : public block @@ -414,7 +484,8 @@ public: arg* carg; std::vector< std::pair, list*> > cases; - std::string generate(int ind); + std::string generate(int ind, generate_context* ctx); + std::string generate(int ind) { return this->generate(ind, nullptr); } }; class if_block : public block @@ -434,7 +505,8 @@ public: list* else_lst; - std::string generate(int ind); + std::string generate(int ind, generate_context* ctx); + std::string generate(int ind) { return this->generate(ind, nullptr); } }; class for_block : public block @@ -452,7 +524,8 @@ public: arglist* iter; list* ops; - std::string generate(int ind); + std::string generate(int ind, generate_context* ctx); + std::string generate(int ind) { return this->generate(ind, nullptr); } }; class while_block : public block @@ -469,7 +542,8 @@ public: list* cond; list* ops; - std::string generate(int ind); + std::string generate(int ind, generate_context* ctx); + std::string generate(int ind) { return this->generate(ind, nullptr); } }; // Subarg subtypes // diff --git a/include/util.hpp b/include/util.hpp index 53c6f4c..4c3b434 100644 --- a/include/util.hpp +++ b/include/util.hpp @@ -12,8 +12,6 @@ #include #include -#include - #include "struc.hpp" extern std::string indenting_string; @@ -149,9 +147,6 @@ int _exec(std::string const& bin, std::vector const& args); std::string stringReplace(std::string subject, const std::string& search, const std::string& replace); -void printFormatError(ztd::format_error const& e, bool print_line=true); -void printErrorIndex(const char* in, const int index, const std::string& message, const std::string& origin, bool print_line=true); - -int execute(shmain* sh, std::vector& args); +void printFormatError(format_error const& e, bool print_line=true); #endif //UTIL_HPP diff --git a/include/version.h b/include/version.h index eb50c34..686ed20 100644 --- a/include/version.h +++ b/include/version.h @@ -1,6 +1,6 @@ #ifndef VERSION_H #define VERSION_H -#define VERSION_STRING "v1.1.0" +#define VERSION_STRING "v1.2.0" #endif //VERSION_H diff --git a/src/debashify.cpp b/src/debashify.cpp index aefea4d..ea89848 100644 --- a/src/debashify.cpp +++ b/src/debashify.cpp @@ -1,6 +1,6 @@ #include "debashify.hpp" -#include "ztd/options.hpp" +#include #include "processing.hpp" #include "recursive.hpp" @@ -144,12 +144,11 @@ std::string get_declare_opt(cmd* in) ztd::option_set gen_echo_opts() { - ztd::option_set ret; - ret.add( - ztd::option('e'), - ztd::option('E'), - ztd::option('n') - ); + ztd::option_set ret( std::vector({ + ztd::option('e'), + ztd::option('E'), + ztd::option('n') + }) ); return ret; } @@ -166,60 +165,124 @@ bool debashify_echo(pipeline* pl) ztd::option_set opts=gen_echo_opts(); std::vector args=in->args->strargs(1); std::vector postargs; + try { - postargs=opts.process(args, true, true); + postargs=opts.process(args, {.ignore_numbers=true, .stop_on_argument=true} ); } catch(ztd::option_error& e) { skip=true; } - if(skip || postargs.size() == args.size()) // no options processed: skip - return false; - // delete the number of args that were processed - for(uint32_t i=0; iargs->args[1]; - in->args->args.erase(in->args->args.begin()+1); - } - - bool doprintf=false; + bool enable_interpretation=false; bool newline=true; - if(opts['E']) + bool has_escape_sequence=false; + bool has_processed_options=false; + + if(!skip && postargs.size() != args.size()) { - doprintf=true; - } - else if(opts['n']) - { - doprintf=true; - newline=false; + has_processed_options=true; + // delete the number of args that were processed + for(uint32_t i=0; iargs->args[1]; + in->args->args.erase(in->args->args.begin()+1); + } + + if(opts['e']) + enable_interpretation=true; + else if(opts['n']) + newline=false; } - if(doprintf) + for(auto it=in->args->args.begin()+1; it!=in->args->args.end(); it++) { - delete in->args->args[0]; - in->args->args[0] = new arg("printf"); - if(possibly_expands(in->args->args[2]) ) + if(!(*it)->is_string() || (*it)->string().find('\\') != std::string::npos) { - in->args->insert(1, new arg("%s\\ ")); - if(newline) // newline: add a newline command at the end + has_escape_sequence=true; + break; + } + } + + if(newline && !has_escape_sequence) + { + // newline and no potential escape: don't replace, keep echo + return has_processed_options; + } + else + { + // replace by printf + if(!in->args->can_expand()) + { + // no potential expansion: static number of args + std::string format_str = "'"; + for(uint32_t i=1; iargs->args.size(); i++) { - brace* br = new brace(new list); - br->lst->add(new condlist(in)); - br->lst->add(make_condlist("echo")); - pl->cmds[0] = br; + if(enable_interpretation) + format_str += "%b "; + else + format_str += "%s "; } + format_str.pop_back(); + if(newline) + format_str += "\\n"; + format_str += '\''; + + in->args->insert(1, new arg(format_str)); + delete in->args->args[0]; + in->args->args[0] = new arg("printf"); } else { - std::string printfarg="'%s"; - for(uint32_t i=2; iargs->size(); i++) - printfarg+=" %s"; + std::string format_str; + if(enable_interpretation) + format_str = "%b"; + else + format_str = "%s"; + + list* lst=nullptr; + + // more than 1 arg and first arg can't expand: can split into two printf + // printf '%s' arg1 + // printf ' %s' args... + if(in->args->args.size()>2 && !in->args->args[1]->can_expand()) + { + // extract arg 1 + arg* arg1 = in->args->args[1]; + in->args->args.erase(in->args->args.begin()+1); + delete in->args->args[0]; + in->args->args[0] = new arg("printf"); + + lst = new list; + lst->add(new condlist(make_cmd({new arg("printf"), new arg(format_str+"\\ "), arg1 }))); + lst->add(new condlist(in)); + } + else + { + // can't reliable replace: keep echo if newline + if(newline) + return has_processed_options; + + in->args->insert(1, new arg(format_str+"\\ ")); + delete in->args->args[0]; + in->args->args[0] = new arg("printf"); + } + if(newline) - printfarg+="\\n"; - printfarg+="'"; - in->args->insert(1, new arg(printfarg)); + { + if(lst == nullptr) + { + lst = new list; + lst->add(new condlist(in)); + } + lst->add(make_condlist("echo")); + } + + if(lst != nullptr) + { + pl->cmds[0] = new brace(lst); + } } } @@ -473,7 +536,7 @@ bool debashify_array_set(cmd* in, debashify_params* params) gen=gen.substr(2); gen.pop_back(); // create cmd out of arguments - arglist* args = parse_arglist( gen.c_str(), gen.size(), 0 ).first; + arglist* args = parse_arglist( make_context(gen) ).first; cmd* c = new cmd(args); // cmd first argument is _lxsh_X_create if(params->arrays[varname]) @@ -569,7 +632,7 @@ bool debashify_array_set(cmd* in, debashify_params* params) gen=gen.substr(3); gen.pop_back(); // create cmd out of arguments - arglist* args = parse_arglist( gen.c_str(), gen.size(), 0 ).first; + arglist* args = parse_arglist( make_context(gen) ).first; cmd* c = new cmd(args); // cmd first argument is _lxsh_array_create if(params->arrays[varname]) diff --git a/src/exec.cpp b/src/exec.cpp index 100dc2b..fe89834 100644 --- a/src/exec.cpp +++ b/src/exec.cpp @@ -18,16 +18,16 @@ #define PIPE_READ 0 #define PIPE_WRITE 1 -std::vector do_include_exec(condlist* cmd, std::string const& filename, FILE* fd) +std::vector do_include_exec(condlist* cmd, parse_context ctx, FILE* fd) { std::vector ret; std::string dir; - auto incs=do_include_raw(cmd, filename, &dir); + auto incs=do_include_raw(cmd, ctx, &dir); for(auto it: incs) { - parse_exec(fd, it.second, it.first); + parse_exec(fd, make_context(ctx, it.second, it.first)); } // cd back _cd(dir); @@ -36,7 +36,7 @@ std::vector do_include_exec(condlist* cmd, std::string const& filenam } // if first is nullptr: is a string -std::vector do_resolve_exec(condlist* cmd, std::string const& filename, FILE* fd) +std::vector do_resolve_exec(condlist* cmd, parse_context ctx, FILE* fd) { std::vector ret; @@ -45,15 +45,15 @@ std::vector do_resolve_exec(condlist* cmd, std::string const& filenam { // get std::string dir; - p=do_resolve_raw(cmd, filename, &dir); + p=do_resolve_raw(cmd, ctx, &dir); // do parse - parse_exec(fd, p.second, filename); + parse_exec(fd, make_context(ctx, p.second, p.first)); // cd back _cd(dir); } - catch(ztd::format_error& e) + catch(format_error& e) { - throw ztd::format_error(e.what(), '`'+p.first+'`', e.data(), e.where()); + throw format_error(e.what(), '`'+p.first+'`', e.data(), e.where()); } return ret; @@ -61,7 +61,7 @@ std::vector do_resolve_exec(condlist* cmd, std::string const& filenam // -- OBJECT CALLS -- -bool resolve_condlist_exec(condlist* in, std::string const& filename, FILE* fd) +bool resolve_condlist_exec(condlist* in, parse_context ctx, FILE* fd) { cmd* tc = in->first_cmd(); if(tc == nullptr) @@ -71,23 +71,23 @@ bool resolve_condlist_exec(condlist* in, std::string const& filename, FILE* fd) if(g_include && strcmd == "%include") { - do_include_exec(in, filename, fd); + do_include_exec(in, ctx, fd); return true; } else if(g_resolve && strcmd == "%resolve") { - do_resolve_exec(in, filename, fd); + do_resolve_exec(in, ctx, fd); return true; } return false; } -bool resolve_exec(condlist* in, std::string const& filename, FILE* fd) +bool resolve_exec(condlist* in, parse_context ctx, FILE* fd) { - if(!resolve_condlist_exec(in, filename, fd)) + if(!resolve_condlist_exec(in, ctx, fd)) { - resolve(in, (std::string*) &filename); + resolve(in, ctx); return false; } return true; @@ -138,60 +138,57 @@ std::string random_string() return ret; } -void parse_exec(FILE* fd, const char* in, uint32_t size, std::string const& filename) +void parse_exec(FILE* fd, parse_context ctx) { - uint32_t i=skip_unread(in, size, 0); -#ifndef NO_PARSE_CATCH - try + ctx.i=skip_unread(ctx); + + debashify_params debash_params; + list* t_lst=new list; + if(t_lst == nullptr) + throw std::runtime_error("Alloc error"); + while(ctx.iadd(pp.first); - if(g_resolve || g_include) + parse_list_until(ctx); + throw std::runtime_error("Aborting due to previous errors"); + } + t_lst->add(pp.first); + if(g_resolve || g_include) + { + if(resolve_exec(t_lst->cls[0], ctx, fd)) { - if(resolve_exec(t_lst->cls[0], filename, fd)) - { - t_lst->clear(); - continue; - } - } - if(options["debashify"]) - debashify(t_lst, &debash_params); - - - std::string gen=t_lst->generate(0); - t_lst->clear(); - - fprintf(fd, "%s", gen.c_str()); - - if(i < size) - { - if(in[i] == '#') - ; // skip here - else if(is_in(in[i], COMMAND_SEPARATOR)) - i++; // skip on next char - else if(is_in(in[i], CONTROL_END)) - throw PARSE_ERROR(strf("Unexpected token: '%c'", in[i]), i); - - i = skip_unread(in, size, i); + t_lst->clear(); + continue; } } - delete t_lst; -#ifndef NO_PARSE_CATCH -} - catch(ztd::format_error& e) - { - throw ztd::format_error(e.what(), filename, in, e.where()); + if(options["debashify"]) + debashify(t_lst, &debash_params); + + + std::string gen=t_lst->generate(0); + t_lst->clear(); + + fprintf(fd, "%s", gen.c_str()); + + if(ctx.i < ctx.size) + { + if(ctx[ctx.i] == '#') + ; // skip here + else if(is_in(ctx[ctx.i], COMMAND_SEPARATOR)) + ctx.i++; // skip on next char + else if(is_in(ctx[ctx.i], CONTROL_END)) + { + format_error(strf("Unexpected token: '%c'", ctx[ctx.i]), ctx); + return; + } + + ctx.i = skip_unread(ctx); + } } -#endif + delete t_lst; } pid_t forkexec(const char* bin, char *const args[]) @@ -205,10 +202,6 @@ pid_t forkexec(const char* bin, char *const args[]) } if (child_pid == 0) // child process { - // char buf[1000] = {0}; - // read(STDIN_FILENO, buf, 1000); - // std::cout << std::string(buf) << std::endl; - // std::cout << dup2(tfd, STDIN_FILENO) << std::endl; setpgid(child_pid, child_pid); //Needed so negative PIDs can kill children of /bin/sh execv(bin, args); throw std::runtime_error("execv() failed"); @@ -234,7 +227,7 @@ int wait_pid(pid_t pid) return WEXITSTATUS(stat); } -int exec_process(std::string const& runtime, std::vector const& args, std::string const& filecontents, std::string const& file) +int exec_process(std::string const& runtime, std::vector const& args, parse_context ctx) { std::vector strargs = split(runtime, " \t"); std::vector runargs; @@ -254,8 +247,6 @@ int exec_process(std::string const& runtime, std::vector const& arg runargs.push_back(NULL); pid_t pid=0; - // std::string test="echo Hello world\nexit 10\n"; - // fprintf(ffd, "%s\n",, test.c_str(), test.size()); FILE* ffd=0; try { @@ -268,7 +259,7 @@ int exec_process(std::string const& runtime, std::vector const& arg } for(auto it: lxsh_extend_fcts) fprintf(ffd, "%s\n", it.second.code); - parse_exec(ffd, filecontents, file); + parse_exec(ffd, ctx); } catch(std::runtime_error& e) { diff --git a/src/generate.cpp b/src/generate.cpp index d5d02bd..41071b2 100644 --- a/src/generate.cpp +++ b/src/generate.cpp @@ -44,7 +44,7 @@ std::string arglist::generate(int ind) return ret; } -std::string pipeline::generate(int ind) +std::string pipeline::generate(int ind, generate_context* ctx) { std::string ret; @@ -53,11 +53,11 @@ std::string pipeline::generate(int ind) if(negated) ret += "! "; - ret += cmds[0]->generate(ind); + ret += cmds[0]->generate(ind, ctx); for(uint32_t i=1 ; igenerate(ind); + ret += cmds[i]->generate(ind, ctx); } return ret; @@ -68,18 +68,27 @@ std::string condlist::generate(int ind) std::string ret; if(pls.size() <= 0) return ""; - ret += pls[0]->generate(ind); + generate_context ctx; + ret += pls[0]->generate(ind, &ctx); for(uint32_t i=0 ; igenerate(ind); + ret += pls[i+1]->generate(ind, &ctx); } if(ret=="") return ""; - if(parallel) + if(ctx.here_document != nullptr) + { + if(parallel) + ret += '&'; + ret += '\n'; + ret += ctx.here_document->generate(0); + ret += '\n'; + } + else if(parallel) { ret += opt_minify ? "&" : " &\n"; } @@ -123,12 +132,18 @@ std::string redirect::generate(int ind) // BLOCK -std::string block::generate_redirs(int ind, std::string const& _str) +std::string block::generate_redirs(int ind, std::string const& _str, generate_context* ctx=nullptr) { std::string ret=" "; bool previous_isnt_num = _str.size()>0 && !is_num(_str[_str.size()-1]); for(auto it: redirs) { + if(ctx != nullptr && it->here_document != nullptr) + { + if(ctx->here_document != nullptr) + throw std::runtime_error("Unsupported generation of concurrent here documents"); + ctx->here_document = it->here_document; + } std::string _r = it->generate(0); if(opt_minify && _r.size() > 0 && !is_num(_r[0]) && previous_isnt_num) ret.pop_back(); // remove one space if possible @@ -139,7 +154,7 @@ std::string block::generate_redirs(int ind, std::string const& _str) return ret; } -std::string if_block::generate(int ind) +std::string if_block::generate(int ind, generate_context* ctx) { std::string ret; @@ -169,11 +184,11 @@ std::string if_block::generate(int ind) ret += indented("fi", ind); - ret += generate_redirs(ind, ret); + ret += generate_redirs(ind, ret, ctx); return ret; } -std::string for_block::generate(int ind) +std::string for_block::generate(int ind, generate_context* ctx) { std::string ret; @@ -187,11 +202,11 @@ std::string for_block::generate(int ind) if(opt_minify && ret.size()>1 && !is_alpha(ret[ret.size()-2])) ret.pop_back(); - ret += generate_redirs(ind, ret); + ret += generate_redirs(ind, ret, ctx); return ret; } -std::string while_block::generate(int ind) +std::string while_block::generate(int ind, generate_context* ctx) { std::string ret; @@ -207,11 +222,11 @@ std::string while_block::generate(int ind) if(opt_minify && ret.size()>1 && !is_alpha(ret[ret.size()-2])) ret.pop_back(); - ret += generate_redirs(ind, ret); + ret += generate_redirs(ind, ret, ctx); return ret; } -std::string subshell::generate(int ind) +std::string subshell::generate(int ind, generate_context* ctx) { std::string ret; // open subshell @@ -224,11 +239,11 @@ std::string subshell::generate(int ind) // close subshell ret += indented(")", ind); - ret += generate_redirs(ind, ret); + ret += generate_redirs(ind, ret, ctx); return ret; } -std::string shmain::generate(int ind) +std::string shmain::generate(int ind, generate_context* ctx) { return this->generate(false, ind); } @@ -241,11 +256,10 @@ std::string shmain::generate(bool print_shebang, int ind) if( opt_minify && ret[ret.size()-1] == '\n') ret.pop_back(); - ret += generate_redirs(ind, ret); return ret; } -std::string brace::generate(int ind) +std::string brace::generate(int ind, generate_context* ctx) { std::string ret; @@ -253,11 +267,11 @@ std::string brace::generate(int ind) ret += lst->generate(ind+1); ret += indented("}", ind); - ret += generate_redirs(ind, ret); + ret += generate_redirs(ind, ret, ctx); return ret; } -std::string function::generate(int ind) +std::string function::generate(int ind, generate_context* ctx) { std::string ret; // function definition @@ -268,11 +282,11 @@ std::string function::generate(int ind) ret += lst->generate(ind+1); ret += indented("}", ind); - ret += generate_redirs(ind, ret); + ret += generate_redirs(ind, ret, ctx); return ret; } -std::string case_block::generate(int ind) +std::string case_block::generate(int ind, generate_context* ctx) { std::string ret; ret += "case " + carg->generate(ind) + " in\n"; @@ -292,27 +306,30 @@ std::string case_block::generate(int ind) // end of case: ;; if(opt_minify && ret[ret.size()-1] == '\n') // ;; can be right after command ret.pop_back(); - ret += indented(";;\n", ind+1); + ret += indented(";;", ind+1); + if(!opt_minify) + ret+="\n"; } - // remove ;; from last case + // replace ;; from last case with ; if(this->cases.size()>0 && opt_minify) { - ret.erase(ret.size()-3, 2); + ret.pop_back(); } // close case ind--; ret += indented("esac", ind); - ret += generate_redirs(ind, ret); + ret += generate_redirs(ind, ret, ctx); return ret; } -std::string cmd::generate(int ind) +std::string cmd::generate(int ind, generate_context* ctx) { std::string ret; - // var assigns + + // is a varassign cmd if(is_cmdvar) { ret += args->generate(ind) + ' '; @@ -328,6 +345,7 @@ std::string cmd::generate(int ind) return ret; } + // pre-cmd var assigns for(auto it: var_assigns) { if(it.first != nullptr) @@ -337,6 +355,7 @@ std::string cmd::generate(int ind) ret += ' '; } + // cmd itself if(args!=nullptr && args->size()>0) { // command @@ -351,7 +370,7 @@ std::string cmd::generate(int ind) ret.pop_back(); } - ret += generate_redirs(ind, ret); + ret += generate_redirs(ind, ret, ctx); return ret; } diff --git a/src/main.cpp b/src/main.cpp index 775b587..289f4fa 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,12 +1,13 @@ #include #include +#include + +#include #include #include -#include - #include "util.hpp" #include "struc.hpp" #include "parse.hpp" @@ -29,6 +30,7 @@ int main(int argc, char* argv[]) bool optstop=false; + shmain *sh=nullptr, *tsh=nullptr; try { args=options.process(argc, argv, {.stop_on_argument=true, .output_doubledash=true} ); @@ -37,53 +39,49 @@ int main(int argc, char* argv[]) optstop=true; args.erase(args.begin()); } - } - catch(std::exception& e) - { - std::cerr << e.what() << std::endl; - return ERR_OPT; - } - oneshot_opt_process(argv[0]); - // resolve input - std::string file; - if(args.size() > 0) // argument provided - { - if(args[0] == "-" || args[0] == "/dev/stdin") //stdin + oneshot_opt_process(argv[0]); + + // resolve input + std::string file; + if(args.size() > 0) // argument provided { - file = "/dev/stdin"; + if(args[0] == "-" || args[0] == "/dev/stdin") //stdin + { + file = "/dev/stdin"; + } + else + { + file=args[0]; + } } else { - file=args[0]; + if(isatty(fileno(stdin))) // stdin is interactive + { + print_help(argv[0]); + return ERR_HELP; + } + else // is piped + { + file = "/dev/stdin"; + args.push_back("/dev/stdin"); + } } - } - else - { - if(isatty(fileno(stdin))) // stdin is interactive - { - print_help(argv[0]); - return ERR_HELP; - } - else // is piped - { - file = "/dev/stdin"; - args.push_back("/dev/stdin"); - } - } - // parsing + // parsing + + sh = new shmain(new list); - shmain* sh = new shmain(new list); - shmain* tsh = nullptr; - try - { bool is_exec = false; bool first_run = true; // do parsing bool shebang_is_bin=false; + bool parse_bash=false; + parse_context ctx; + std::string binshebang; for(uint32_t i=0 ; ishebang="#!/bin/sh"; /* mid processing */ // resolve/include if(g_include || g_resolve) - resolve(tsh); + resolve(tsh, ctx); // concatenate to main sh->concat(tsh); @@ -166,11 +171,13 @@ int main(int argc, char* argv[]) list_fcts(sh, re_fct_exclude); else if(options["list-cmd"]) list_cmds(sh, regex_null); - // output +#ifdef DEBUG_MODE else if(options['J']) { std::cout << gen_json_struc(sh) << std::endl; } +#endif + // output else { // post-listing modifiers @@ -186,7 +193,10 @@ int main(int argc, char* argv[]) // processing before output // minify if(options['m']) + { opt_minify=true; + string_processors(sh); + } if(options["minify-quotes"]) minify_quotes(sh); if(options["minify-var"]) @@ -215,8 +225,7 @@ int main(int argc, char* argv[]) } } } -#ifndef NO_PARSE_CATCH - catch(ztd::format_error& e) + catch(format_error& e) { if(tsh != nullptr) delete tsh; @@ -224,12 +233,17 @@ int main(int argc, char* argv[]) printFormatError(e); return ERR_PARSE; } -#endif + catch(ztd::option_error& e) + { + std::cerr << e.what() << std::endl; + return ERR_OPT; + } catch(std::runtime_error& e) { if(tsh != nullptr) delete tsh; - delete sh; + if(sh != nullptr) + delete sh; std::cerr << e.what() << std::endl; return ERR_RUNTIME; } diff --git a/src/minify.cpp b/src/minify.cpp index 842bf85..e046c69 100644 --- a/src/minify.cpp +++ b/src/minify.cpp @@ -69,8 +69,8 @@ bool r_replace_var(_obj* in, strmap_t* varmap) return true; } -const char* escaped_char=" \\\t!\"()|&*?~"; -const char* doublequote_escape_char=" \t'|&\\*?~"; +const char* escaped_char=" \\\t!\"()|&*?~><"; +const char* doublequote_escape_char=" \t'|&\\*?~><"; uint32_t count_escape_chars(std::string const& in, bool doublequote) { uint32_t r=0; diff --git a/src/options.cpp b/src/options.cpp index dce2c6c..443af50 100644 --- a/src/options.cpp +++ b/src/options.cpp @@ -7,55 +7,53 @@ #include "version.h" #include "g_version.h" -ztd::option_set options = gen_options(); bool opt_minify=false; +ztd::option_set options( { + ztd::option("\r [Help]"), + ztd::option('h', "help", false, "Display this help message"), + ztd::option("version", false, "Display version"), + ztd::option("help-link-commands", false, "Print help for linker commands"), + ztd::option("help-extend-fcts", false, "Print help for lxsh extension functions"), + ztd::option("\r [Output]"), + ztd::option('o', "output", true , "Output result script to file", "file"), + ztd::option('c', "stdout", false, "Output result script to stdout"), + ztd::option('e', "exec", false, "Directly execute script"), + ztd::option("no-shebang", false, "Don't output shebang"), +#ifdef DEBUG_MODE + ztd::option("\r [Debugging]"), + ztd::option('J', "json", false, "Output the json structure"), +#endif + ztd::option("\r [Processing]"), + ztd::option('m', "minify", false, "Minify code without changing functionality"), + ztd::option('M', "minify-full", false, "Enable all minifying features: -m --minify-quotes --minify-var --minify-fct --remove-unused"), + ztd::option("minify-quotes", false, "Remove unnecessary quotes"), + ztd::option('C', "no-cd", false, "Don't cd when doing %include and %resolve"), + ztd::option('I', "no-include", false, "Don't resolve %include commands"), + ztd::option('R', "no-resolve", false, "Don't resolve %resolve commands"), + ztd::option("no-extend", false, "Don't add lxsh extension functions"), + ztd::option("debashify", false, "Attempt to turn a bash-specific script into a POSIX shell script"), + ztd::option("remove-unused", false, "Remove unused functions and variables"), + ztd::option("list-cmd", false, "List all commands invoked in the script"), + ztd::option("\r [Variable processing]"), + ztd::option("exclude-var", true, "List of matching regex to ignore for variable processing", "list"), + ztd::option("no-exclude-reserved",false, "Don't exclude reserved variables"), + ztd::option("minify-var", false, "Minify variable names"), + ztd::option("list-var", false, "List all variables set and invoked in the script"), + ztd::option("list-var-def", false, "List all variables set in the script"), + ztd::option("list-var-call", false, "List all variables invoked in the script"), + ztd::option("unset-var", false, "Add 'unset' to all variables at the start of the script to avoid environment interference"), + ztd::option("\r [Function processing]"), + ztd::option("exclude-fct", true, "List of matching regex to ignore for function processing", "list"), + ztd::option("minify-fct", false, "Minify function names"), + ztd::option("list-fct", false, "List all functions defined in the script") +} ); + bool g_cd=false; bool g_include=true; bool g_resolve=true; bool g_shebang=true; -ztd::option_set gen_options() -{ - ztd::option_set ret; - ret.add( - ztd::option("\r [Help]"), - ztd::option('h', "help", false, "Display this help message"), - ztd::option("version", false, "Display version"), - ztd::option("help-link-commands", false, "Print help for linker commands"), - ztd::option("help-extend-fcts", false, "Print help for lxsh extension functions"), - ztd::option("\r [Output]"), - ztd::option('o', "output", true , "Output result script to file", "file"), - ztd::option('c', "stdout", false, "Output result script to stdout"), - ztd::option('e', "exec", false, "Directly execute script"), - ztd::option("no-shebang", false, "Don't output shebang"), - ztd::option('J', "json", false, "Output the json structure"), - ztd::option("\r [Processing]"), - ztd::option('m', "minify", false, "Minify code without changing functionality"), - ztd::option('M', "minify-full", false, "Enable all minifying features: -m --minify-quotes --minify-var --minify-fct --remove-unused"), - ztd::option("minify-quotes", false, "Remove unnecessary quotes"), - ztd::option('C', "no-cd", false, "Don't cd when doing %include and %resolve"), - ztd::option('I', "no-include", false, "Don't resolve %include commands"), - ztd::option('R', "no-resolve", false, "Don't resolve %resolve commands"), - ztd::option("no-extend", false, "Don't add lxsh extension functions"), - ztd::option("debashify", false, "Attempt to turn a bash-specific script into a POSIX shell script"), - ztd::option("\r [var/fct processing]"), - ztd::option("minify-var", false, "Minify variable names"), - ztd::option("minify-fct", false, "Minify function names"), - ztd::option("exclude-var", true, "List of matching regex to ignore for variable processing", "list"), - ztd::option("exclude-fct", true, "List of matching regex to ignore for function processing", "list"), - ztd::option("no-exclude-reserved",false, "Don't exclude reserved variables"), - ztd::option("list-var", false, "List all variables set and invoked in the script"), - ztd::option("list-var-def", false, "List all variables set in the script"), - ztd::option("list-var-call", false, "List all variables invoked in the script"), - ztd::option("list-fct", false, "List all functions defined in the script"), - ztd::option("list-cmd", false, "List all commands invoked in the script"), - ztd::option("remove-unused", false, "Remove unused functions and variables"), - ztd::option("unset-var", false, "Add 'unset' to all vars at the start of the script to avoid environment interference") - ); - return ret; -} - void get_opts() { g_cd=!options['C'].activated; @@ -102,9 +100,8 @@ ztd::option_set create_resolve_opts() void print_help(const char* arg0) { printf("%s [options] [arg...]\n", arg0); - printf("Link extended shell\n"); - printf("Include files and resolve commands on build time\n"); - printf("See --help-commands for help on linker commands\n"); + printf("Extended shell linker\n"); + printf("Include files, resolve commands on build time, process and minify shell code\n"); printf("\n"); printf("Options:\n"); options.print_help(4,25); diff --git a/src/parse.cpp b/src/parse.cpp index 4d58df4..92e070d 100644 --- a/src/parse.cpp +++ b/src/parse.cpp @@ -11,8 +11,6 @@ #define ORIGIN_NONE "" -bool g_bash=false; - // macro // constants @@ -27,7 +25,33 @@ const std::vector out_reserved_words = { "then", "else", "fi", "esa // stuff -std::string g_expecting; +std::string unexpected_token(char c) +{ + std::string print; + print += c; + if(c == '\n') + print="\\n"; + return "Unexpected token '"+print+"'"; +} + +std::string unexpected_token(std::string const& s) +{ + return "Unexpected token '"+s+"'"; +} + +void parse_error(std::string const& message, parse_context& ctx) +{ + printFormatError(format_error(message, ctx)); + ctx.has_errored=true; +} + +void parse_error(std::string const& message, parse_context& ctx, uint64_t i) +{ + parse_context newctx = ctx; + newctx.i = i; + printFormatError(format_error(message, newctx)); + ctx.has_errored=true; +} std::string expecting(std::string const& word) { @@ -37,6 +61,14 @@ std::string expecting(std::string const& word) return ""; } +std::string expecting(const char* word) +{ + if(word == NULL) + return expecting(std::string()); + else + return expecting(std::string(word)); +} + // basic char utils bool has_common_char(const char* str1, const char* str2) @@ -63,29 +95,44 @@ bool valid_name(std::string const& str) // string utils -bool word_eq(const char* word, const char* in, uint32_t size, uint32_t start, const char* end_set) +parse_context make_context(std::string const& in, std::string const& filename, bool bash) { - uint32_t wordsize=strlen(word); - if(wordsize > size-start) - return false; - if(strncmp(word, in+start, wordsize) == 0) - { - if(end_set==NULL) - return true; - // end set - if(wordsize < size-start) - return is_in(in[start+wordsize], end_set); - } - return false; + parse_context ctx = { .data=in.c_str(), .size=in.size(), .filename=filename.c_str(), .bash=bash}; + return ctx; } -std::pair get_word(const char* in, uint32_t size, uint32_t start, const char* end_set) +parse_context make_context(parse_context ctx, std::string const& in, std::string const& filename, bool bash) { - uint32_t i=start; - while(i parse_var(const char* in, uint32_t size, uint32_t start, bool specialvars, bool array) +uint32_t word_eq(const char* word, const char* in, uint32_t size, uint32_t start, const char* end_set) { uint32_t i=start; + uint32_t wordsize=strlen(word); + if(wordsize > size-i) + return false; + if(strncmp(word, in+i, wordsize) == 0) + { + if(end_set==NULL) + return true; + // end set + if(wordsize < size-i) + return is_in(in[i+wordsize], end_set); + } + return false; +} + +std::pair get_word(parse_context ctx, const char* end_set) +{ + uint32_t start=ctx.i; + while(ctx.i parse_var(parse_context ctx, bool specialvars, bool array) +{ variable* ret=nullptr; std::string varname; + uint32_t start=ctx.i; // special vars - if(specialvars && (is_in(in[i], SPECIAL_VARS) || (in[i]>='0' && in[i]<='1')) ) + if(specialvars && (is_in(ctx[ctx.i], SPECIAL_VARS) || (ctx[ctx.i]>='0' && ctx[ctx.i]<='9')) ) { - varname=in[i]; - i++; + varname=ctx[ctx.i]; + ctx.i++; } else // varname { - while(iindex=pp.first; - i = pp.second; - if(in[i] != ']') - throw PARSE_ERROR( "Expecting ']'", i ); - i++; + ctx = pp.second; + if(ctx[ctx.i] != ']') + { + parse_error( "Expecting ']'", ctx ); + return std::make_pair(ret, ctx); + } + ctx.i++; } } - return std::make_pair(ret, i); + return std::make_pair(ret, ctx); } -std::pair get_operator(const char* in, uint32_t size, uint32_t start) +std::pair get_operator(parse_context ctx) { - uint32_t i=start; std::string ret; + uint32_t start=ctx.i; - while(!is_alphanum(in[i]) && !is_in(in[i], SEPARATORS) && in[i]!=')' ) - i++; + while(!is_alphanum(ctx[ctx.i]) && !is_in(ctx[ctx.i], SEPARATORS) && ctx[ctx.i]!=')' ) + ctx.i++; - ret = std::string(in+start, i-start); + ret = std::string(ctx.data+start, ctx.i-start); - return std::make_pair(ret, i); + return std::make_pair(ret, ctx.i); } +//** HERE **// + // parse an arithmetic // ends at )) // temporary, to improve -std::pair parse_arithmetic(const char* in, uint32_t size, uint32_t start) +std::pair parse_arithmetic(parse_context ctx) { arithmetic* ret = nullptr; - uint32_t i=start; -#ifndef NO_PARSE_CATCH - try + ctx.i = skip_chars(ctx, SEPARATORS); + if(ctx.i>ctx.size || ctx[ctx.i] == ')') { -#endif - i = skip_chars(in, size, i, SEPARATORS); - if(i>size || in[i] == ')') - throw PARSE_ERROR( "Unexpected end of arithmetic", i ); + parse_error( "Unexpected end of arithmetic", ctx ); + return std::make_pair(ret, ctx); + } - auto po = get_operator(in, size, i); - if(is_among(po.first, arithmetic_precedence_operators)) + auto po = get_operator(ctx); + if(is_among(po.first, arithmetic_precedence_operators)) + { + ctx.i = po.second; + auto pa = parse_arithmetic(ctx); + ret = new operation_arithmetic(po.first, pa.first, nullptr, true); + ctx=pa.second; + } + else + { + variable_arithmetic* ttvar=nullptr; // for categorizing definitions + if(ctx[ctx.i]=='-' || is_num(ctx[ctx.i])) { - auto pa = parse_arithmetic(in, size, po.second); - ret = new operation_arithmetic(po.first, pa.first, nullptr, true); - i=pa.second; + uint32_t j=ctx.i; + if(ctx[ctx.i]=='-') + ctx.i++; + while(is_num(ctx[ctx.i])) + ctx.i++; + ret = new number_arithmetic( std::string(ctx.data+j, ctx.i-j) ); + } + else if(word_eq("$(", ctx)) + { + ctx.i+=2; + auto ps = parse_subshell(ctx); + ret = new subshell_arithmetic(ps.first); + ctx=ps.second; + } + else if(word_eq("${", ctx)) + { + ctx.i+=2; + auto pm = parse_manipulation(ctx); + ret = new variable_arithmetic(pm.first); + ctx=pm.second; + } + else if(ctx[ctx.i] == '(') + { + ctx.i++; + auto pa = parse_arithmetic(ctx); + ret = pa.first; + ctx = pa.second; + ctx.i++; } else { - variable_arithmetic* ttvar=nullptr; // for categorizing definitions - if(in[i]=='-' || is_num(in[i])) + bool specialvars=false; + if(ctx[ctx.i] == '$') { - uint32_t j=i; - if(in[i]=='-') - i++; - while(is_num(in[i])) - i++; - ret = new number_arithmetic( std::string(in+j, i-j) ); + specialvars=true; + ctx.i++; } - else if(word_eq("$(", in, size, i)) - { - auto ps = parse_subshell(in, size, i+2); - ret = new subshell_arithmetic(ps.first); - i=ps.second; - } - else if(word_eq("${", in, size, i)) - { - auto pm = parse_manipulation(in, size, i+2); - ret = new variable_arithmetic(pm.first); - i=pm.second; - } - else if(in[i] == '(') - { - auto pa = parse_arithmetic(in, size, i+1); - ret = pa.first; - i = pa.second+1; - } - else - { - bool specialvars=false; - if(in[i] == '$') - { - specialvars=true; - i++; - } - auto pp = parse_var(in, size, i, specialvars, true); - ttvar = new variable_arithmetic(pp.first); - ret = ttvar; - i=pp.second; - } - - i = skip_chars(in, size, i, SEPARATORS); - auto po = get_operator(in, size, i); - if(po.first != "") - { - if(!is_among(po.first, arithmetic_operators)) - throw PARSE_ERROR( "Unknown arithmetic operator: "+po.first, i); - arithmetic* val1 = ret; - auto pa = parse_arithmetic(in, size, po.second); - arithmetic* val2 = pa.first; - i = pa.second; - ret = new operation_arithmetic(po.first, val1, val2); - i = skip_chars(in, size, i, SEPARATORS); - } - - if(po.first == "=" && ttvar!=nullptr) // categorize as var definition - ttvar->var->definition=true; - - if(i >= size) - throw PARSE_ERROR( "Unexpected end of file, expecting '))'", i ); - if(in[i] != ')') - throw PARSE_ERROR( "Unexpected token, expecting ')'", i); + auto pp = parse_var(ctx, specialvars, true); + ttvar = new variable_arithmetic(pp.first); + ret = ttvar; + ctx=pp.second; } -#ifndef NO_PARSE_CATCH - } - catch(ztd::format_error& e) - { - delete ret; - throw e; - } -#endif - return std::make_pair(ret, i); + ctx.i = skip_chars(ctx, SEPARATORS); + auto po = get_operator(ctx); + if(po.first != "") + { + if(!is_among(po.first, arithmetic_operators)) + { + parse_error( "Unknown arithmetic operator: "+po.first, ctx); + } + arithmetic* val1 = ret; + ctx.i=po.second; + auto pa = parse_arithmetic(ctx); + arithmetic* val2 = pa.first; + ctx = pa.second; + ret = new operation_arithmetic(po.first, val1, val2); + ctx.i = skip_chars(ctx, SEPARATORS); + } + + if(po.first == "=" && ttvar!=nullptr) // categorize as var definition + ttvar->var->definition=true; + + if(ctx.i >= ctx.size) + { + parse_error( "Unexpected end of file, expecting '))'", ctx ); + return std::make_pair(ret, ctx); + } + if(ctx[ctx.i] != ')') + { + parse_error( unexpected_token(ctx[ctx.i])+ ", expecting ')'", ctx); + return std::make_pair(ret, ctx); + } + } + + return std::make_pair(ret, ctx); } -std::pair parse_manipulation(const char* in, uint32_t size, uint32_t start) +std::pair parse_manipulation(parse_context ctx) { variable* ret = nullptr; - uint32_t i=start; arg* precede = nullptr; + uint32_t start=ctx.i; -#ifndef NO_PARSE_CATCH - try + + if(ctx[ctx.i] == '#' || ctx[ctx.i] == '!') { -#endif - ; - if(in[i] == '#' || in[i] == '!') + if(!ctx.bash && ctx[ctx.i] == '!') { - if(!g_bash && in[i] == '!') - throw PARSE_ERROR("bash specific: '${!}'", i); - std::string t; - t+=in[i]; - precede = new arg( t ); - i++; + parse_error("bash specific: '${!}'", ctx); + return std::make_pair(ret, ctx); } - - auto p=parse_var(in, size, i, true, true); - if(p.first == nullptr) - throw PARSE_ERROR( "Bad variable name", i ); - ret = p.first; - i = p.second; - - ret->is_manip=true; - if(precede != nullptr) - { - if(in[i] != '}') - throw PARSE_ERROR( "Incompatible operations", start ); - ret->manip = precede; - ret->precedence=true; - precede=nullptr; - } - else if(in[i] != '}') - { - auto pa = parse_arg(in, size, i, "}", NULL, false); - ret->manip=pa.first; - i = pa.second; - } - i++; - -#ifndef NO_PARSE_CATCH + std::string t; + t+=ctx[ctx.i]; + precede = new arg( t ); + ctx.i++; } - catch(ztd::format_error& e) + + auto p=parse_var(ctx, true, true); + if(p.first == nullptr) { - if(ret != nullptr) delete ret; - throw e; + parse_error( "Bad variable name", ctx ); + return std::make_pair(ret, ctx); } -#endif + ret = p.first; + ctx = p.second; - return std::make_pair(ret, i); + ret->is_manip=true; + if(precede != nullptr) + { + if(ctx[ctx.i] != '}') + { + parse_error( "Incompatible operations", ctx, start ); + return std::make_pair(ret, ctx); + } + ret->manip = precede; + ret->precedence=true; + precede=nullptr; + } + else if(ctx[ctx.i] != '}') + { + auto pa = parse_arg(ctx, "}", NULL, false); + ret->manip=pa.first; + ctx = pa.second; + } + ctx.i++; + + return std::make_pair(ret, ctx); } -void do_one_subarg_step(arg* ret, const char* in, uint32_t size, uint32_t& i, uint32_t& j, bool is_quoted) +parse_context do_one_subarg_step(arg* ret, parse_context ctx, uint32_t& j, bool is_quoted) { - if( in[i] == '`' ) + if( ctx[ctx.i] == '`' ) { // add previous subarg - std::string tmpstr=std::string(in+j, i-j); + std::string tmpstr=std::string(ctx.data+j, ctx.i-j); if(tmpstr!="") ret->add(tmpstr); - i++; - uint32_t k=skip_until(in, size, i, "`"); - if(k>=size) - throw PARSE_ERROR("Expecting '`'", i-1); - if(in[k-1] == '\\' && in[k-2] != '\\') - throw PARSE_ERROR("Escaping backticks is not supported", k); + ctx.i++; + uint32_t k=skip_until(ctx, "`"); + if(k>=ctx.size) + { + parse_error("Expecting '`'", ctx, ctx.i-1); + return ctx; + } + if(ctx[k-1] == '\\' && ctx[k-2] != '\\') + { + parse_error("Escaping backticks is not supported", ctx, k); + return make_context(ctx, k); + } // get subshell - auto r=parse_list_until(in, k, i, 0); - ret->add(new subshell_subarg(new subshell(r.first), is_quoted)); - j = i = r.second+1; + parse_context newct = ctx; + ctx.size=k; + auto r=parse_list_until(newct); + ret->add(new subshell_subarg(new subshell(std::get<0>(r)), is_quoted)); + ctx = std::get<1>(r); + ctx.i++; + j = ctx.i; } - else if( word_eq("$((", in, size, i) ) // arithmetic operation + else if( word_eq("$((", ctx) ) // arithmetic operation { // add previous subarg - std::string tmpstr=std::string(in+j, i-j); + std::string tmpstr=std::string(ctx.data+j, ctx.i-j); if(tmpstr!="") ret->add(tmpstr); // get arithmetic - auto r=parse_arithmetic(in, size, i+3); + ctx.i+=3; + auto r=parse_arithmetic(ctx); arithmetic_subarg* tt = new arithmetic_subarg(r.first); tt->quoted=is_quoted; ret->add(tt); - i = r.second; - if(!word_eq("))", in, size, i)) - throw PARSE_ERROR( "Unexpected token ')', expecting '))'", i); - i+=2; - j=i; + ctx = r.second; + if(ctx.i >= ctx.size) + return ctx; + if(!word_eq("))", ctx)) + { + parse_error( "Unexpected token ')', expecting '))'", ctx); + return ctx+1; + } + ctx.i+=2; + j=ctx.i; } - else if( word_eq("$(", in, size, i) ) // substitution + else if( word_eq("$(", ctx) ) // substitution { // add previous subarg - std::string tmpstr=std::string(in+j, i-j); + std::string tmpstr=std::string(ctx.data+j, ctx.i-j); if(tmpstr!="") ret->add(tmpstr); // get subshell - auto r=parse_subshell(in, size, i+2); + ctx.i+=2; + auto r=parse_subshell(ctx); ret->add(new subshell_subarg(r.first, is_quoted)); - j = i = r.second; + ctx = r.second; + j = ctx.i; } - else if( word_eq("${", in, size, i) ) // variable manipulation + else if( word_eq("${", ctx) ) // variable manipulation { // add previous subarg - std::string tmpstr=std::string(in+j, i-j); + std::string tmpstr=std::string(ctx.data+j, ctx.i-j); if(tmpstr!="") ret->add(tmpstr); // get manipulation - auto r=parse_manipulation(in, size, i+2); + ctx.i+=2; + auto r=parse_manipulation(ctx); ret->add(new variable_subarg(r.first, is_quoted)); - j = i = r.second; + ctx = r.second; + j = ctx.i; } - else if( in[i] == '$' ) + else if( ctx[ctx.i] == '$' ) { - auto r=parse_var(in, size, i+1); + parse_context newct=ctx; + newct.i++; + auto r=parse_var(newct); if(r.first !=nullptr) { // add previous subarg - std::string tmpstr=std::string(in+j, i-j); + std::string tmpstr=std::string(ctx.data+j, ctx.i-j); if(tmpstr!="") ret->add(tmpstr); // add var ret->add(new variable_subarg(r.first, is_quoted)); - j = i = r.second; + ctx = r.second; + j = ctx.i; } else - i++; + ctx.i++; } else - i++; + ctx.i++; + return ctx; } // parse one argument // must start at a read char // ends at either " \t|&;\n()" -std::pair parse_arg(const char* in, uint32_t size, uint32_t start, const char* end, const char* unexpected, bool doquote) +std::pair parse_arg(parse_context ctx, const char* end, const char* unexpected, bool doquote) { arg* ret = new arg; // j : start of subarg , q = start of quote - uint32_t i=start,j=start,q=start; + uint32_t j=ctx.i,q=ctx.i; -#ifndef NO_PARSE_CATCH - try + if(unexpected != NULL && is_in(ctx[ctx.i], unexpected)) { -#endif - ; - if(unexpected != NULL && is_in(in[i], unexpected)) - throw PARSE_ERROR( strf("Unexpected token '%c'", in[i]) , i); + parse_error( unexpected_token(ctx[ctx.i]) , ctx); + } - while(i") && ctx[ctx.i+1]=='&') // special case for <& and >& { - if(i+1") && in[i+1]=='&') // special case for <& and >& + ctx.i += 2; + } + else if(doquote && ctx[ctx.i]=='\\') // backslash: don't check next char + { + ctx.i++; + if(ctx.i>=ctx.size) + break; + if(ctx[ctx.i] == '\n') // \ on \n : skip this char { - i+=2; - } - else if(doquote && in[i]=='\\') // backslash: don't check next char - { - i++; - if(i>=size) - break; - if(in[i] == '\n') // \ on \n : skip this char - { - std::string tmpstr=std::string(in+j, i-1-j); - if(tmpstr!="") - ret->add(tmpstr); - i++; - j=i; - } - else - i++; - } - else if(doquote && in[i] == '"') // start double quote - { - q=i; - i++; - while(in[i] != '"') // while inside quoted string - { - if(in[i] == '\\') // backslash: don't check next char - { - i+=2; - } - else - do_one_subarg_step(ret, in, size, i, j, true); - - if(i>=size) - throw PARSE_ERROR("Unterminated double quote", q); - } - i++; - } - else if(doquote && in[i] == '\'') // start single quote - { - q=i; - i++; - while(i=size) - throw PARSE_ERROR("Unterminated single quote", q); - i++; + std::string tmpstr=std::string(ctx.data+j, ctx.i-1-j); + if(tmpstr!="") + ret->add(tmpstr); + ctx.i++; + j=ctx.i; } else - do_one_subarg_step(ret, in, size, i, j, false); + ctx.i++; } + else if(doquote && ctx[ctx.i] == '"') // start double quote + { + q=ctx.i; + ctx.i++; + while(ctx[ctx.i] != '"') // while inside quoted string + { + if(ctx[ctx.i] == '\\') // backslash: don't check next char + { + ctx.i+=2; + } + else + ctx = do_one_subarg_step(ret, ctx, j, true); - // add string subarg - std::string val=std::string(in+j, i-j); - if(val != "") - ret->add(val); - -#ifndef NO_PARSE_CATCH + if(ctx.i>=ctx.size) + { + parse_error("Unterminated double quote", ctx, q); + return std::make_pair(ret, ctx); + } + } + ctx.i++; + } + else if(doquote && ctx[ctx.i] == '\'') // start single quote + { + q=ctx.i; + ctx.i++; + while(ctx.i=ctx.size) + { + parse_error("Unterminated single quote", ctx, q); + return std::make_pair(ret, ctx); + } + ctx.i++; + } + else + ctx = do_one_subarg_step(ret, ctx, j, false); } - catch(ztd::format_error& e) - { - delete ret; - throw e; - } -#endif - return std::make_pair(ret, i); + // add string subarg + std::string val=std::string(ctx.data+j, ctx.i-j); + if(val != "") + ret->add(val); + + return std::make_pair(ret, ctx); } -std::pair parse_redirect(const char* in, uint32_t size, uint32_t start) +parse_context parse_heredocument(parse_context ctx) { - uint32_t i=start; + if(ctx.here_document == nullptr) + return ctx; + uint32_t j=ctx.i; + char* tc=NULL; + std::string delimitor=ctx.here_delimitor; + tc = (char*) strstr(ctx.data+ctx.i, std::string("\n"+delimitor+"\n").c_str()); // find delimitor + if(tc!=NULL) // delimitor was found + { + ctx.i = (tc-ctx.data)+delimitor.size()+1; + } + else + { + ctx.i = ctx.size; + } + // std::string tmpparse=std::string(ctx.data+j, ctx.i-j); + auto pval = parse_arg({ .data=ctx.data, .size=ctx.i, .i=j, .bash=ctx.bash} , NULL); + ctx.here_document->here_document = pval.first; + + // + ctx.here_document=nullptr; + free(ctx.here_delimitor); + ctx.here_delimitor=NULL; + + return ctx; +} + +std::pair parse_redirect(parse_context ctx) +{ bool is_redirect=false; bool needs_arg=false; bool has_num_prefix=false; - if(is_num(in[i])) + uint32_t start=ctx.i; + + if(is_num(ctx[ctx.i])) { - i++; + ctx.i++; has_num_prefix=true; } - if( in[i] == '>' ) + if( ctx[ctx.i] == '>' ) { - i++; - if(i>size) - throw PARSE_ERROR("Unexpected end of file", i); - is_redirect = true; - if(i+1ctx.size) { - i+=2; + parse_error("Unexpected end of file", ctx); + return std::make_pair(nullptr, ctx); + } + is_redirect = true; + if(ctx.i+1& bash operator + else if(ctx[ctx.i] == '&') // >& bash operator { - if(!g_bash) - throw PARSE_ERROR("bash specific: '>&'", i); - i++; + if(!ctx.bash) + { + parse_error("bash specific: '>&'", ctx); + } + ctx.i++; needs_arg=true; } else { - if(in[i] == '>') - i++; + if(ctx[ctx.i] == '>') + ctx.i++; needs_arg=true; } } - else if( in[i] == '<' ) + else if( ctx[ctx.i] == '<' ) { if(has_num_prefix) - throw PARSE_ERROR("Invalid input redirection", i-1); - i++; - if(i>size) - throw PARSE_ERROR("Unexpected end of file", i); - if(in[i] == '<') { - i++; - if(ictx.size) + { + parse_error("Unexpected end of file", ctx); + return std::make_pair(nullptr, ctx); + } + if(ctx[ctx.i] == '<') + { + ctx.i++; + if(ctx.i", in, size, i) ) // &> bash operator + else if( word_eq("&>", ctx) ) // &> bash operator { - if(!g_bash) - throw PARSE_ERROR("bash specific: '&>'", i); - i+=2; - if(i') - i++; + if(!ctx.bash) + { + parse_error("bash specific: '&>'", ctx); + } + ctx.i+=2; + if(ctx.i') + ctx.i++; is_redirect=true; needs_arg=true; } @@ -566,553 +708,443 @@ std::pair parse_redirect(const char* in, uint32_t size, uin if(is_redirect) { redirect* ret=nullptr; -#ifndef NO_PARSE_CATCH - try + + ret = new redirect; + ret->op = std::string(ctx.data+start, ctx.i-start); + if(needs_arg) { -#endif - ret = new redirect; - ret->op = std::string(in+start, i-start); - if(needs_arg) + ctx.i = skip_chars(ctx, SPACES); + if(ret->op == "<<") { - i = skip_chars(in, size, i, SPACES); - if(ret->op == "<<") + if(ctx.here_document != nullptr) { - auto pa = parse_arg(in, size, i); - std::string delimitor = pa.first->string(); - delete pa.first; - pa.first = nullptr; - - if(delimitor == "") - throw PARSE_ERROR("Non-static or empty text input delimitor", i); - - if(delimitor.find('"') != std::string::npos || delimitor.find('\'') != std::string::npos || delimitor.find('\\') != std::string::npos) - { - delimitor = ztd::sh("echo "+delimitor); // shell resolve the delimitor - delimitor.pop_back(); // remove \n - } - - i = skip_chars(in, size, pa.second, SPACES); // skip spaces - - if(in[i] == '#') // skip comment - i = skip_until(in, size, i, "\n"); //skip to endline - if(in[i] != '\n') // has another arg - throw PARSE_ERROR("Additionnal argument after text input delimitor", i); - - i++; - uint32_t j=i; - char* tc=NULL; - tc = (char*) strstr(in+i, std::string("\n"+delimitor+"\n").c_str()); // find delimitor - if(tc!=NULL) // delimitor was found - { - i = (tc-in)+delimitor.size()+1; - } - else - { - i = size; - } - std::string tmpparse=std::string(in+j, i-j); - auto pval = parse_arg(tmpparse.c_str(), tmpparse.size(), 0, NULL); - ret->target = pval.first; - ret->target->insert(0, delimitor+"\n"); + parse_error("unsupported multiple here documents at the same time", ctx); + return std::make_pair(ret, ctx); } else + ctx.here_document=ret; + + auto pa = parse_arg(ctx); + std::string delimitor = pa.first->string(); + + if(delimitor == "") { - auto pa = parse_arg(in, size, i); - ret->target = pa.first; - i=pa.second; + parse_error("non-static or empty here document delimitor", ctx); } + + if(delimitor.find('"') != std::string::npos || delimitor.find('\'') != std::string::npos || delimitor.find('\\') != std::string::npos) + { + delimitor = ztd::sh("echo "+delimitor); // shell resolve the delimitor + delimitor.pop_back(); // remove \n + } + ret->target = pa.first; + ctx = pa.second; + // copy delimitor + ctx.here_delimitor = (char*) malloc(delimitor.length()+1); + strcpy(ctx.here_delimitor, delimitor.c_str()); + } + else + { + auto pa = parse_arg(ctx); + ret->target = pa.first; + ctx=pa.second; } -#ifndef NO_PARSE_CATCH } - catch(ztd::format_error& e) - { - if(ret!=nullptr) - delete ret; - throw e; - } -#endif - return std::make_pair(ret, i); + return std::make_pair(ret, ctx); } else - return std::make_pair(nullptr, start); + { + ctx.i=start; + return std::make_pair(nullptr, ctx); + } } // parse one list of arguments (a command for instance) // must start at a read char // first char has to be read // ends at either &|;\n#() -std::pair parse_arglist(const char* in, uint32_t size, uint32_t start, bool hard_error, std::vector* redirs) +std::pair parse_arglist(parse_context ctx, bool hard_error, std::vector* redirs) { - uint32_t i=start; arglist* ret = nullptr; -#ifndef NO_PARSE_CATCH - try + if(word_eq("[[", ctx, ARG_END) ) // [[ bash specific parsing { -#endif - ; - if(word_eq("[[", in, size, i, ARG_END) ) // [[ bash specific parsing + if(!ctx.bash) { - if(!g_bash) - throw PARSE_ERROR("bash specific: '[['", i); - while(true) + parse_error("bash specific: '[['", ctx); + } + while(true) + { + if(ret == nullptr) + ret = new arglist; + auto pp=parse_arg(ctx, SEPARATORS, NULL); + ret->add(pp.first); + ctx = pp.second; + ctx.i = skip_chars(ctx, SEPARATORS); + if(word_eq("]]", ctx, ARG_END)) { - if(ret == nullptr) - ret = new arglist; - auto pp=parse_arg(in, size, i, SEPARATORS, NULL); - ret->add(pp.first); - i = pp.second; - i = skip_chars(in, size, i, SEPARATORS); - if(word_eq("]]", in, size, i, ARG_END)) + ret->add(new arg("]]")); + ctx.i+=2; + ctx.i = skip_chars(ctx, SPACES); + if( !is_in(ctx[ctx.i], ARGLIST_END) ) { - ret->add(new arg("]]")); - i = skip_chars(in, size, i+2, SPACES); - if( !is_in(in[i], ARGLIST_END) ) - throw PARSE_ERROR("Unexpected argument after ']]'", i); - break; + parse_error("Unexpected argument after ']]'", ctx); + ctx = parse_arglist(ctx).second; } - if(i>=size) - throw PARSE_ERROR( "Expecting ']]'", i); + break; + } + if(ctx.i>=ctx.size) + { + parse_error( "Expecting ']]'", ctx); + return std::make_pair(ret, ctx); } } - else if(is_in(in[i], SPECIAL_TOKENS) && !word_eq("&>", in, size, i)) + } + else if(is_in(ctx[ctx.i], SPECIAL_TOKENS) && !word_eq("&>", ctx)) + { + if(hard_error) { - if(hard_error) - throw PARSE_ERROR( strf("Unexpected token '%c'", in[i]) , i); - else - return std::make_pair(ret, i); + parse_error( unexpected_token(ctx[ctx.i]) , ctx); } else + return std::make_pair(ret, ctx); + } + // ** HERE ** + else + { + while(ctx.i') && ctx[ctx.i+1] == '(' ) // bash specific <() { - if(i+1 < size && (in[i] == '<' || in[i] == '>') && in[i+1] == '(' ) // bash specific <() + if(!ctx.bash) { - bool is_output = in[i] == '>'; - i+=2; - if(ret == nullptr) - ret = new arglist; - auto ps = parse_subshell(in, size, i); - ret->add(new arg(new procsub_subarg(is_output, ps.first))); - i=ps.second; + parse_error(strf("bash specific: %c()", ctx[ctx.i]), ctx); } - else if(redirs!=nullptr) + bool is_output = ctx[ctx.i] == '>'; + ctx.i+=2; + if(ret == nullptr) + ret = new arglist; + auto ps = parse_subshell(ctx); + ret->add(new arg(new procsub_subarg(is_output, ps.first))); + ctx=ps.second; + } + else if(redirs!=nullptr) + { + auto pr = parse_redirect(ctx); + if(pr.first != nullptr) { - auto pr = parse_redirect(in, size, i); - if(pr.first != nullptr) - { - redirs->push_back(pr.first); - i=pr.second; - } - else - goto argparse; + redirs->push_back(pr.first); + ctx=pr.second; } else - { - argparse: - if(ret == nullptr) - ret = new arglist; - auto pp=parse_arg(in, size, i); - ret->add(pp.first); - i = pp.second; - } - i = skip_chars(in, size, i, SPACES); - if(word_eq("&>", in, size, i)) - continue; // &> has to be managed in redirects - if(word_eq("|&", in, size, i)) - throw PARSE_ERROR("Unsupported '|&', use '2>&1 |' instead", i); - if(i>=size) - return std::make_pair(ret, i); - if( is_in(in[i], SPECIAL_TOKENS) ) - return std::make_pair(ret, i); + goto argparse; } - + else + { + argparse: + if(ret == nullptr) + ret = new arglist; + auto pp=parse_arg(ctx); + ret->add(pp.first); + ctx = pp.second; + } + ctx.i = skip_chars(ctx, SPACES); + if(word_eq("&>", ctx)) + continue; // &> has to be managed in redirects + if(word_eq("|&", ctx)) + { + parse_error("Unsupported '|&', use '2>&1 |' instead", ctx); + return std::make_pair(ret, ctx+1); + } + if(ctx.i>=ctx.size) + return std::make_pair(ret, ctx); + if( is_in(ctx[ctx.i], SPECIAL_TOKENS) ) + return std::make_pair(ret, ctx); } + } -#ifndef NO_PARSE_CATCH - } - catch(ztd::format_error& e) - { - if(ret != nullptr) - delete ret; - throw e; - } -#endif - return std::make_pair(ret, i); + return std::make_pair(ret, ctx); } // parse a pipeline // must start at a read char // separated by | // ends at either &;\n#) -std::pair parse_pipeline(const char* in, uint32_t size, uint32_t start) +std::pair parse_pipeline(parse_context ctx) { - uint32_t i=start; pipeline* ret = new pipeline; -#ifndef NO_PARSE_CATCH - try + if(ctx[ctx.i] == '!' && ctx.i+1negated = true; - i=skip_chars(in, size, i+1, SPACES); - } - while(iadd(pp.first); - i = skip_chars(in, size, pp.second, SPACES); - if( i>=size || is_in(in[i], PIPELINE_END) || word_eq("||", in, size, i) ) - return std::make_pair(ret, i); - else if( in[i] != '|' ) - throw PARSE_ERROR( strf("Unexpected token: '%c'", in[i] ), i); - i++; - } -#ifndef NO_PARSE_CATCH + ret->negated = true; + ctx.i++; + ctx.i=skip_chars(ctx, SPACES); } - catch(ztd::format_error& e) + while(ctx.iadd(pp.first); + ctx = pp.second; + ctx.i = skip_chars(ctx, SPACES); + if( ctx.i>=ctx.size || is_in(ctx[ctx.i], PIPELINE_END) || word_eq("||", ctx) ) + return std::make_pair(ret, ctx); + else if( ctx[ctx.i] != '|' ) + { + parse_error( unexpected_token(ctx[ctx.i] ), ctx); + return std::make_pair(ret, ctx); + } + ctx.i++; } -#endif - return std::make_pair(ret, i); + return std::make_pair(ret, ctx); } // parse condition lists // must start at a read char // separated by && or || // ends at either ;\n)# -std::pair parse_condlist(const char* in, uint32_t size, uint32_t start) +std::pair parse_condlist(parse_context ctx) { - uint32_t i = skip_unread(in, size, start); condlist* ret = new condlist; + ctx.i = skip_unread(ctx); -#ifndef NO_PARSE_CATCH - try + bool optype=AND_OP; + while(ctx.iadd(pp.first, optype); + ctx = pp.second; + if(ctx.i>=ctx.size || is_in(ctx[ctx.i], CONTROL_END) || is_in(ctx[ctx.i], COMMAND_SEPARATOR)) // end here exactly: used for control later { - auto pp=parse_pipeline(in, size, i); - ret->add(pp.first, optype); - i = pp.second; - if(i>=size || is_in(in[i], CONTROL_END) || is_in(in[i], COMMAND_SEPARATOR)) // end here exactly: used for control later - { - return std::make_pair(ret, i); - } - else if( word_eq("&", in, size, i) && !word_eq("&&", in, size, i) ) // parallel: end one char after - { - ret->parallel=true; - i++; - return std::make_pair(ret, i); - } - else if( word_eq("&&", in, size, i) ) // and op - { - i += 2; - optype=AND_OP; - } - else if( word_eq("||", in, size, i) ) // or op - { - i += 2; - optype=OR_OP; - } - else - throw PARSE_ERROR( strf("Unexpected token: '%c'", in[i]), i); - i = skip_unread(in, size, i); - if(i>=size) - throw PARSE_ERROR( "Unexpected end of file", i ); + return std::make_pair(ret, ctx); + } + else if( word_eq("&", ctx) && !word_eq("&&", ctx) ) // parallel: end one char after + { + ret->parallel=true; + ctx.i++; + return std::make_pair(ret, ctx); + } + else if( word_eq("&&", ctx) ) // and op + { + ctx.i += 2; + optype=AND_OP; + } + else if( word_eq("||", ctx) ) // or op + { + ctx.i += 2; + optype=OR_OP; + } + else + { + parse_error( unexpected_token(ctx[ctx.i]), ctx); + return std::make_pair(ret, ctx); + } + ctx.i = skip_unread(ctx); + if(ctx.i>=ctx.size) + { + parse_error( "Unexpected end of file", ctx ); + return std::make_pair(ret, ctx); } -#ifndef NO_PARSE_CATCH } - catch(ztd::format_error& e) - { - delete ret; - throw e; - } -#endif - return std::make_pair(ret, i); + return std::make_pair(ret, ctx); } -std::pair parse_list_until(const char* in, uint32_t size, uint32_t start, char end_c, const char* expecting) +std::tuple parse_list_until(parse_context ctx, list_parse_options opts) { list* ret = new list; - uint32_t i=skip_unread(in, size, start); - -#ifndef NO_PARSE_CATCH - try - { -#endif - while(in[i] != end_c) - { - auto pp=parse_condlist(in, size, i); - ret->add(pp.first); - i=pp.second; - - if(i < size) - { - if(in[i] == end_c) // end char, stop here - break; - else if(in[i] == '#') - ; // skip here - else if(is_in(in[i], COMMAND_SEPARATOR)) - i++; // skip on next char - else if(is_in(in[i], CONTROL_END)) - throw PARSE_ERROR(strf("Unexpected token: '%c'", in[i]), i); - - i = skip_unread(in, size, i); - } - - if(i>=size) - { - if(end_c != 0) - { - if(expecting!=NULL) - throw PARSE_ERROR(strf("Expecting '%s'", expecting), start-1); - else - throw PARSE_ERROR(strf("Expecting '%c'", end_c), start-1); - } - else - break; - } - } -#ifndef NO_PARSE_CATCH - } - catch(ztd::format_error& e) - { - delete ret; - throw e; - } -#endif - return std::make_pair(ret, i); -} - -std::pair parse_list_until(const char* in, uint32_t size, uint32_t start, std::string const& end_word) -{ - list* ret = new list; - uint32_t i=skip_unread(in, size, start); - -#ifndef NO_PARSE_CATCH - try - { -#endif - std::string old_expect=g_expecting; - g_expecting=end_word; - while(true) - { - // check word - auto wp=get_word(in, size, i, ARG_END); - if(wp.first == end_word) - { - i=wp.second; - break; - } - // do a parse - auto pp=parse_condlist(in, size, i); - ret->add(pp.first); - i=pp.second; - if(i=size) - { - throw PARSE_ERROR(strf("Expecting '%s'", end_word.c_str()), start-1); - } - } - g_expecting=old_expect; -#ifndef NO_PARSE_CATCH - } - catch(ztd::format_error& e) - { - delete ret; - throw e; - } -#endif - return std::make_pair(ret, i); -} - - -std::tuple parse_list_until(const char* in, uint32_t size, uint32_t start, std::vector const& end_words, const char* expecting) -{ - list* ret = new list; - uint32_t i=skip_unread(in, size, start);; + ctx.i=skip_unread(ctx); std::string found_end_word; -#ifndef NO_PARSE_CATCH - try + char& end_c = opts.end_char; + std::vector& end_words = opts.end_words; + + const char* old_expect=ctx.expecting; + + if(opts.expecting!=NULL) + ctx.expecting=opts.expecting; + else if(opts.word_mode) + ctx.expecting=end_words[0].c_str(); + else + ctx.expecting=std::string(&end_c, 1).c_str(); + + bool stop=false; + while(true) { -#endif - std::string old_expect=g_expecting; - g_expecting=end_words[0]; - bool stop=false; - while(true) + if(opts.word_mode) { // check words - auto wp=get_word(in, size, i, ARG_END); + auto wp=get_word(ctx, ARG_END); for(auto it: end_words) { - if(it == ";" && in[i] == ';') + if(it == ";" && ctx[ctx.i] == ';') { found_end_word=";"; - i++; + ctx.i++; stop=true; break; } if(wp.first == it) { found_end_word=it; - i=wp.second; + ctx.i=wp.second; stop=true; break; } } if(stop) break; - // do a parse - auto pp=parse_condlist(in, size, i); - ret->add(pp.first); - i=pp.second; - if(in[i] == '#') - ; // skip here - else if(is_in(in[i], COMMAND_SEPARATOR)) - i++; // skip on next - - i = skip_unread(in, size, i); - // word wasn't found - if(i>=size) - { - if(expecting!=NULL) - throw PARSE_ERROR(strf("Expecting '%s'", expecting), start-1); - else - throw PARSE_ERROR(strf("Expecting '%s'", end_words[0].c_str()), start-1); - } } - g_expecting=old_expect; -#ifndef NO_PARSE_CATCH + else if(ctx[ctx.i] == end_c) + { + break; + } + // do a parse + auto pp=parse_condlist(ctx); + ret->add(pp.first); + ctx=pp.second; + + if(!opts.word_mode && ctx[ctx.i] == end_c) + break; // reached end char: stop here + else if(ctx[ctx.i] == '\n') + { + if(ctx.here_document != nullptr) + ctx = parse_heredocument(ctx+1); + // do here document parse + } + else if(ctx[ctx.i] == '#') + ; // skip here + else if(is_in(ctx[ctx.i], COMMAND_SEPARATOR)) + ; // skip on next + else if(is_in(ctx[ctx.i], CONTROL_END)) + { + // control end: unexpected + parse_error( unexpected_token(ctx[ctx.i]), ctx); + break; + } + + if(ctx.here_document != nullptr) + { + uint8_t do_twice=2; + // case of : cat << EOF ; + while(do_twice>0) + { + if(ctx[ctx.i] == '\n') + { + ctx = parse_heredocument(ctx+1); + break; + } + else if(ctx[ctx.i] == '#') + { + ctx.i = skip_until(ctx, "\n"); //skip to endline + ctx = parse_heredocument(ctx+1); + break; + } + skip_chars(ctx, SPACES); + do_twice--; + } + // case of : cat << EOF ; ; + if(do_twice==0 && is_in(ctx[ctx.i], COMMAND_SEPARATOR)) + parse_error( unexpected_token(ctx[ctx.i]), ctx); + } + + if(is_in(ctx[ctx.i], COMMAND_SEPARATOR)) + ctx.i++; + + ctx.i = skip_unread(ctx); + + // word wasn't found + if(ctx.i>=ctx.size) + { + if(opts.word_mode || opts.end_char != 0) + { + parse_error(strf("Expecting '%s'", ctx.expecting), ctx); + return std::make_tuple(ret, ctx, ""); + } + else + break; + } } - catch(ztd::format_error& e) - { - delete ret; - throw e; - } -#endif - return std::make_tuple(ret, i, found_end_word); + ctx.expecting=old_expect; + return std::make_tuple(ret, ctx, found_end_word); } // parse a subshell // must start right after the opening ( // ends at ) and nothing else -std::pair parse_subshell(const char* in, uint32_t size, uint32_t start) +std::pair parse_subshell(parse_context ctx) { - uint32_t i = skip_unread(in, size, start); subshell* ret = new subshell; + uint32_t start=ctx.i; + ctx.i = skip_unread(ctx); -#ifndef NO_PARSE_CATCH - try + auto pp=parse_list_until(ctx, {.end_char=')', .expecting=")"} ); + ret->lst=std::get<0>(pp); + ctx=std::get<1>(pp); + if(ret->lst->size()<=0) { -#endif - auto pp=parse_list_until(in, size, start, ')'); - ret->lst=pp.first; - i=pp.second; - if(ret->lst->size()<=0) - throw PARSE_ERROR("Subshell is empty", start-1); - i++; -#ifndef NO_PARSE_CATCH + parse_error("Subshell is empty", ctx, start-1); } - catch(ztd::format_error& e) - { - delete ret; - throw e; - } -#endif - return std::make_pair(ret,i); + ctx.i++; + + return std::make_pair(ret,ctx); } // parse a brace block // must start right after the opening { // ends at } and nothing else -std::pair parse_brace(const char* in, uint32_t size, uint32_t start) +std::pair parse_brace(parse_context ctx) { - uint32_t i = skip_unread(in, size, start); brace* ret = new brace; + uint32_t start=ctx.i; + ctx.i = skip_unread(ctx); -#ifndef NO_PARSE_CATCH - try + auto pp=parse_list_until(ctx, {.end_char='}', .expecting="}"}); + ret->lst=std::get<0>(pp); + ctx=std::get<1>(pp); + if(ret->lst->size()<=0) { -#endif - auto pp=parse_list_until(in, size, start, '}'); - ret->lst=pp.first; - i=pp.second; - if(ret->lst->size()<=0) - throw PARSE_ERROR("Brace block is empty", start-1); - i++; -#ifndef NO_PARSE_CATCH + parse_error("Brace block is empty", ctx, start-1); + return std::make_pair(ret, ctx+1); } - catch(ztd::format_error& e) - { - delete ret; - throw e; - } -#endif + ctx.i++; - return std::make_pair(ret,i); + return std::make_pair(ret,ctx); } // parse a function // must start right after the () // then parses a brace block -std::pair parse_function(const char* in, uint32_t size, uint32_t start, const char* after) +std::pair parse_function(parse_context ctx, const char* after) { - uint32_t i=start; function* ret = new function; -#ifndef NO_PARSE_CATCH - try + ctx.i=skip_unread(ctx); + if(ctx[ctx.i] != '{') { -#endif - i=skip_unread(in, size, i); - if(in[i] != '{') - throw PARSE_ERROR( strf("Expecting { after %s", after) , i); - i++; - - auto pp=parse_list_until(in, size, i, '}'); - if(pp.first->size()<=0) - throw PARSE_ERROR("Function is empty", i); - - ret->lst=pp.first; - i=pp.second; - i++; -#ifndef NO_PARSE_CATCH + parse_error( strf("Expecting { after %s", after) , ctx); + return std::make_pair(ret, ctx); } - catch(ztd::format_error& e) + ctx.i++; + + auto pp=parse_list_until(ctx, {.end_char='}', .expecting="}"} ); + ret->lst=std::get<0>(pp); + if(ret->lst->size()<=0) { - delete ret; - throw e; + parse_error("Function is empty", ctx); + ctx.i=std::get<1>(pp).i+1; + return std::make_pair(ret, ctx); } -#endif - return std::make_pair(ret, i); + ctx=std::get<1>(pp); + ctx.i++; + + return std::make_pair(ret, ctx); } // parse only var assigns -uint32_t parse_cmd_varassigns(cmd* ret, const char* in, uint32_t size, uint32_t start, bool cmdassign=false, std::string const& cmd="") +parse_context parse_cmd_varassigns(cmd* ret, parse_context ctx, bool cmdassign=false, std::string const& cmd="") { - uint32_t i=start; bool forbid_assign=false; bool forbid_special=false; if(cmdassign && (cmd == "read" || cmd == "unset") ) @@ -1120,70 +1152,88 @@ uint32_t parse_cmd_varassigns(cmd* ret, const char* in, uint32_t size, uint32_t if(cmdassign && (forbid_special || cmd == "export") ) forbid_special=true; - while(idefinition=true; - if(vp.first != nullptr && vp.secondinsert(0,"("); ta->add(")"); - i=pp.second+1; + ctx = pp.second; + ctx.i++; } - else if( is_in(in[i], ARG_END) ) // no value : give empty value + else if( is_in(ctx[ctx.i], ARG_END) ) // no value : give empty value { ta = new arg; } else { - auto pp=parse_arg(in, size, i); + auto pp=parse_arg(ctx); ta=pp.first; - i=pp.second; + ctx=pp.second; } ta->insert(0, strop); ret->var_assigns.push_back(std::make_pair(vp.first, ta)); - i=skip_chars(in, size, i, SPACES); + ctx.i=skip_chars(ctx, SPACES); } else { if(cmdassign) { - if(vp.first != nullptr && is_in(in[vp.second], ARG_END) ) + if(vp.first != nullptr && is_in(newct[newct.i], ARG_END) ) { ret->var_assigns.push_back(std::make_pair(vp.first, nullptr)); - i=vp.second; + ctx=newct; } else { delete vp.first; - auto pp=parse_arg(in, size, i); + auto pp=parse_arg(ctx); ret->var_assigns.push_back(std::make_pair(nullptr, pp.first)); - i=pp.second; + ctx=pp.second; } - i=skip_chars(in, size, i, SPACES); + ctx.i=skip_chars(ctx, SPACES); } else { @@ -1193,451 +1243,462 @@ uint32_t parse_cmd_varassigns(cmd* ret, const char* in, uint32_t size, uint32_t } } } - return i; + return ctx; } // must start at read char -std::pair parse_cmd(const char* in, uint32_t size, uint32_t start) +std::pair parse_cmd(parse_context ctx) { cmd* ret = new cmd; - uint32_t i=start; + uint32_t start=ctx.i; -#ifndef NO_PARSE_CATCH - try + ctx = parse_cmd_varassigns(ret, ctx); + + auto wp=get_word(ctx, ARG_END); + if(is_in_vector(wp.first, posix_cmdvar) || is_in_vector(wp.first, bash_cmdvar)) { -#endif -; - i=parse_cmd_varassigns(ret, in, size, i); - - auto wp=get_word(in, size, i, ARG_END); - if(is_in_vector(wp.first, posix_cmdvar) || is_in_vector(wp.first, bash_cmdvar)) + if(!ctx.bash && is_in_vector(wp.first, bash_cmdvar)) { - if(!g_bash && is_in_vector(wp.first, bash_cmdvar)) - throw PARSE_ERROR("bash specific: "+wp.first, i); - if(ret->var_assigns.size()>0) - throw PARSE_ERROR("Unallowed preceding variables on "+wp.first, start); - - ret->args = new arglist; - ret->args->add(new arg(wp.first)); - ret->is_cmdvar=true; - i=skip_chars(in, size, wp.second, SPACES); - - i=parse_cmd_varassigns(ret, in, size, i, true, wp.first); + parse_error("bash specific: "+wp.first, ctx); + } + if(ret->var_assigns.size()>0) + { + parse_error("Unallowed preceding variables on "+wp.first, ctx, start); } - if(!is_in(in[i], SPECIAL_TOKENS)) - { - auto pp=parse_arglist(in, size, i, true, &ret->redirs); - ret->args = pp.first; - i = pp.second; - } - else if(ret->var_assigns.size() <= 0) - throw PARSE_ERROR( strf("Unexpected token: '%c'", in[i]), i ); + ret->args = new arglist; + ret->args->add(new arg(wp.first)); + ret->is_cmdvar=true; + ctx.i = wp.second; + ctx.i = skip_chars(ctx, SPACES); -#ifndef NO_PARSE_CATCH + ctx = parse_cmd_varassigns(ret, ctx, true, wp.first); } - catch(ztd::format_error& e) + + if(!is_in(ctx[ctx.i], SPECIAL_TOKENS)) { - delete ret; - throw e; + auto pp=parse_arglist(ctx, true, &ret->redirs); + ret->args = pp.first; + ctx = pp.second; + } + else if(ret->var_assigns.size() <= 0) + { + parse_error( unexpected_token(ctx[ctx.i]), ctx ); + ctx.i++; } -#endif - return std::make_pair(ret, i); + return std::make_pair(ret, ctx); } // parse a case block // must start right after the case // ends at } and nothing else -std::pair parse_case(const char* in, uint32_t size, uint32_t start) +std::pair parse_case(parse_context ctx) { - uint32_t i=skip_chars(in, size, start, SPACES);; case_block* ret = new case_block; + ctx.i=skip_chars(ctx, SPACES); -#ifndef NO_PARSE_CATCH - try + // get the treated argument + auto pa = parse_arg(ctx); + ret->carg = pa.first; + ctx=pa.second; + ctx.i=skip_unread(ctx); + + // must be an 'in' + if(!word_eq("in", ctx, SEPARATORS)) { -#endif - // get the treated argument - auto pa = parse_arg(in, size, i); - ret->carg = pa.first; - i=skip_unread(in, size, pa.second); - - // must be an 'in' - if(!word_eq("in", in, size, i, SEPARATORS)) - { - std::string pp=get_word(in, size, i, SEPARATORS).first; - throw PARSE_ERROR( strf("Unexpected word: '%s', expecting 'in' after case", pp.c_str()), i); - } - - i=skip_unread(in, size, i+2); - - // parse all cases - while(icases.push_back( std::make_pair(std::vector(), nullptr) ); - // iterator to last element - auto cc = ret->cases.end()-1; - - // toto) - while(true) - { - pa = parse_arg(in, size, i); - cc->first.push_back(pa.first); - if(pa.first->size() <= 0) - throw PARSE_ERROR("Empty case value", i); - i=skip_unread(in, size, pa.second); - if(i>=size) - throw PARSE_ERROR("Unexpected end of file. Expecting 'esac'", i); - if(in[i] == ')') - break; - if(in[i] != '|' && is_in(in[i], SPECIAL_TOKENS)) - throw PARSE_ERROR( strf("Unexpected token '%c', expecting ')'", in[i]), i ); - i=skip_unread(in, size, i+1); - } - i++; - - // until ;; - auto tp = parse_list_until(in, size, i, {";", "esac"}, ";;"); - cc->second = std::get<0>(tp); - i = std::get<1>(tp); - std::string word = std::get<2>(tp); - if(word == "esac") - { - i -= 4; - break; - } - if(i >= size) - throw PARSE_ERROR("Expecting ';;'", i); - if(in[i-1] != ';') - throw PARSE_ERROR("Unexpected token: ';'", i); - - i=skip_unread(in, size, i+1); - } - - // ended before finding esac - if(i>=size) - throw PARSE_ERROR("Expecting 'esac'", i); - i+=4; -#ifndef NO_PARSE_CATCH + std::string word=get_word(ctx, SEPARATORS).first; + parse_error( strf("Unexpected word: '%s', expecting 'in' after case", word.c_str()), ctx); } - catch(ztd::format_error& e) + ctx.i+=2; + ctx.i=skip_unread(ctx); + + // parse all cases + while(ctx.icases.push_back( std::make_pair(std::vector(), nullptr) ); + // iterator to last element + auto cc = ret->cases.end()-1; - return std::make_pair(ret, i); -} - -std::pair parse_if(const char* in, uint32_t size, uint32_t start) -{ - if_block* ret = new if_block; - uint32_t i=start; - -#ifndef NO_PARSE_CATCH - try - { -#endif + // toto) while(true) { - std::string word; - - ret->blocks.push_back(std::make_pair(nullptr, nullptr)); - auto ll = ret->blocks.end()-1; - - auto pp=parse_list_until(in, size, i, "then"); - ll->first = pp.first; - i = pp.second; - if(ll->first->size()<=0) - throw PARSE_ERROR("Condition is empty", i); - - auto tp=parse_list_until(in, size, i, {"fi", "elif", "else"}); - ll->second = std::get<0>(tp); - i = std::get<1>(tp); - word = std::get<2>(tp); - if(std::get<0>(tp)->size() <= 0) - throw PARSE_ERROR("if block is empty", i); - - if(word == "fi") - break; - if(word == "else") + pa = parse_arg(ctx); + cc->first.push_back(pa.first); + ctx = pa.second; + if(pa.first->size() <= 0) { - auto pp=parse_list_until(in, size, i, "fi"); - if(pp.first->size()<=0) - throw PARSE_ERROR("else block is empty", i); - ret->else_lst=pp.first; - i=pp.second; - break; + parse_error("Empty case value", ctx); } - + ctx.i = skip_unread(ctx); + if(ctx.i>=ctx.size) + { + parse_error("Unexpected end of file. Expecting 'esac'", ctx); + return std::make_pair(ret, ctx); + } + if(ctx[ctx.i] == ')') + break; + if(ctx[ctx.i] != '|' && is_in(ctx[ctx.i], SPECIAL_TOKENS)) + { + parse_error( unexpected_token(ctx[ctx.i])+", expecting ')'", ctx ); + } + // | + ctx.i++; + ctx.i=skip_unread(ctx); } + ctx.i++; -#ifndef NO_PARSE_CATCH + // until ;; + auto tp = parse_list_until(ctx, { .word_mode=true, .end_words={";", "esac"}, .expecting=";;" }); + cc->second = std::get<0>(tp); + ctx = std::get<1>(tp); + std::string word = std::get<2>(tp); + if(word == "esac") + { + ctx.i -= 4; + break; + } + if(ctx.i >= ctx.size) + { + parse_error("Expecting ';;'", ctx); + } + if(ctx[ctx.i-1] != ';') + { + parse_error(strf("Unexpected token '%c'", ctx[ctx.i-1]), ctx); + } + if(ctx[ctx.i] == ';') + ctx.i++; + ctx.i=skip_unread(ctx); } - catch(ztd::format_error& e) + + // ended before finding esac + if(ctx.i>=ctx.size) { - delete ret; - throw e; + parse_error("Expecting 'esac'", ctx); + return std::make_pair(ret, ctx); } -#endif + ctx.i+=4; - return std::make_pair(ret, i); + return std::make_pair(ret, ctx); } -std::pair parse_for(const char* in, uint32_t size, uint32_t start) +std::pair parse_if(parse_context ctx) +{ + if_block* ret = new if_block; + + while(true) + { + std::string word; + parse_context oldctx = ctx; + + ret->blocks.push_back(std::make_pair(nullptr, nullptr)); + auto ll = ret->blocks.end()-1; + + auto pp=parse_list_until(ctx, {.word_mode=true, .end_words={"then"}}); + ll->first = std::get<0>(pp); + ctx = std::get<1>(pp); + if(ll->first->size()<=0) + { + parse_error("Condition is empty", oldctx); + ctx.has_errored=true; + } + + auto tp=parse_list_until(ctx, {.word_mode=true, .end_words={"fi", "elif", "else"}} ); + ll->second = std::get<0>(tp); + parse_context newctx = std::get<1>(tp); + word = std::get<2>(tp); + if(ll->second->size() <= 0) + { + parse_error("if block is empty", ctx); + newctx.has_errored=true; + } + ctx = newctx; + + if(word == "fi") + break; + if(word == "else") + { + auto pp=parse_list_until(ctx, {.word_mode=true, .end_words={"fi"}}); + ret->else_lst=std::get<0>(pp); + if(ret->else_lst->size()<=0) + { + parse_error("else block is empty", ctx); + ctx=std::get<1>(pp); + ctx.has_errored=true; + } + else + ctx=std::get<1>(pp); + + break; + } + + } + + return std::make_pair(ret, ctx); +} + +std::pair parse_for(parse_context ctx) { for_block* ret = new for_block; - uint32_t i=skip_chars(in, size, start, SPACES); + ctx.i = skip_chars(ctx, SPACES); -#ifndef NO_PARSE_CATCH - try + auto wp = get_word(ctx, ARG_END); + + if(!valid_name(wp.first)) { -#endif - auto wp = get_word(in, size, i, ARG_END); - - if(!valid_name(wp.first)) - throw PARSE_ERROR( strf("Bad identifier in for clause: '%s'", wp.first.c_str()), i ); - ret->var = new variable(wp.first, nullptr, true); - i=skip_chars(in, size, wp.second, SPACES); - - // in - wp = get_word(in, size, i, ARG_END); - if(wp.first == "in") - { - i=skip_chars(in, size, wp.second, SPACES); - auto pp = parse_arglist(in, size, i, false); - ret->iter = pp.first; - i = pp.second; - } - else if(wp.first != "") - throw PARSE_ERROR( "Expecting 'in' after for", i ); - - // end of arg list - if(!is_in(in[i], "\n;#")) - throw PARSE_ERROR( strf("Unexpected token '%c', expecting '\\n' or ';'", in[i]), i ); - if(in[i] == ';') - i++; - i=skip_unread(in, size, i); - - // do - wp = get_word(in, size, i, ARG_END); - if(wp.first != "do") - throw PARSE_ERROR( "Expecting 'do', after for", i); - i=skip_unread(in, size, wp.second); - - // ops - auto lp = parse_list_until(in, size, i, "done"); - ret->ops=lp.first; - i=lp.second; -#ifndef NO_PARSE_CATCH + parse_error( strf("Bad variable name in for clause: '%s'", wp.first.c_str()), ctx ); } - catch(ztd::format_error& e) + ret->var = new variable(wp.first, nullptr, true); + ctx.i = wp.second; + ctx.i=skip_chars(ctx, SPACES); + + // in + wp = get_word(ctx, ARG_END); + if(wp.first == "in") { - delete ret; - throw e; + ctx.i=wp.second; + ctx.i=skip_chars(ctx, SPACES); + auto pp = parse_arglist(ctx, false); + ret->iter = pp.first; + ctx = pp.second; + } + else if(wp.first != "") + { + parse_error( "Expecting 'in' after for", ctx ); + ctx.i=wp.second; + ctx.i=skip_chars(ctx, SPACES); } -#endif - return std::make_pair(ret, i); + // end of arg list + if(!is_in(ctx[ctx.i], "\n;#")) + { + parse_error( unexpected_token(ctx[ctx.i])+", expecting newline, ';' or 'in'", ctx ); + while(!is_in(ctx[ctx.i], "\n;#")) + ctx.i++; + } + if(ctx[ctx.i] == ';') + ctx.i++; + ctx.i=skip_unread(ctx); + + // do + wp = get_word(ctx, ARG_END); + if(wp.first != "do") + { + parse_error( "Expecting 'do', after for", ctx); + } + else + { + ctx.i = wp.second; + ctx.i = skip_unread(ctx); + } + + // ops + auto lp = parse_list_until(ctx, {.word_mode=true, .end_words={"done"}} ); + ret->ops=std::get<0>(lp); + ctx=std::get<1>(lp); + + return std::make_pair(ret, ctx); } -std::pair parse_while(const char* in, uint32_t size, uint32_t start) +std::pair parse_while(parse_context ctx) { while_block* ret = new while_block; - uint32_t i=start; -#ifndef NO_PARSE_CATCH - try + // cond + parse_context oldctx = ctx; + auto pp=parse_list_until(ctx, {.word_mode=true, .end_words={"do"}}); + + ret->cond = std::get<0>(pp); + ctx = std::get<1>(pp); + + if(ret->cond->size() <= 0) { -#endif - // cond - auto pp=parse_list_until(in, size, i, "do"); - ret->cond = pp.first; - i = pp.second; - if(ret->cond->size() <= 0) - throw PARSE_ERROR("condition is empty", i); - - - // ops - auto lp = parse_list_until(in, size, i, "done"); - ret->ops=lp.first; - i = lp.second; - if(ret->ops->size() <= 0) - throw PARSE_ERROR("while is empty", i); -#ifndef NO_PARSE_CATCH + parse_error("condition is empty", oldctx); + ctx.has_errored=true; } - catch(ztd::format_error& e) + + // ops + oldctx = ctx; + auto lp = parse_list_until(ctx, {.word_mode=true, .end_words={"done"}} ); + ret->ops=std::get<0>(lp); + ctx = std::get<1>(lp); + if(ret->ops->size() <= 0) { - delete ret; - throw e; + parse_error("while is empty", oldctx); + ctx.has_errored=true; } -#endif - return std::make_pair(ret, i); + return std::make_pair(ret, ctx); } // detect if brace, subshell, case or other -std::pair parse_block(const char* in, uint32_t size, uint32_t start) +std::pair parse_block(parse_context ctx) { - uint32_t i = skip_chars(in, size, start, SEPARATORS); + ctx.i = skip_chars(ctx, SEPARATORS); block* ret = nullptr; -#ifndef NO_PARSE_CATCH - try + if(ctx.i>=ctx.size) { -#endif - if(i>=size) - throw PARSE_ERROR("Unexpected end of file", i); - if( in[i] == '(' ) //subshell + parse_error("Unexpected end of file", ctx); + return std::make_pair(ret, ctx); + } + if( ctx.data[ctx.i] == '(' ) //subshell + { + ctx.i++; + auto pp = parse_subshell(ctx); + ret = pp.first; + ctx = pp.second; + } + else + { + auto wp=get_word(ctx, BLOCK_TOKEN_END); + std::string& word=wp.first; + parse_context newct=ctx; + newct.i=wp.second; + // reserved words + if( word == "{" ) // brace block { - auto pp = parse_subshell(in, size, i+1); + auto pp = parse_brace(newct); ret = pp.first; - i = pp.second; + ctx = pp.second; } - else + else if(word == "case") // case { - auto wp=get_word(in, size, i, BLOCK_TOKEN_END); - std::string word=wp.first; - // reserved words - if( word == "{" ) // brace block - { - auto pp = parse_brace(in, size, wp.second); - ret = pp.first; - i = pp.second; - } - else if(word == "case") // case - { - auto pp = parse_case(in, size, wp.second); - ret = pp.first; - i = pp.second; - } - else if( word == "if" ) // if - { - auto pp=parse_if(in, size, wp.second); - ret = pp.first; - i = pp.second; - } - else if( word == "for" ) - { - auto pp=parse_for(in, size, wp.second); - ret = pp.first; - i = pp.second; - } - else if( word == "while" ) - { - auto pp=parse_while(in, size, wp.second); - ret = pp.first; - i = pp.second; - } - else if( word == "until" ) - { - auto pp=parse_while(in, size, wp.second); - pp.first->real_condition()->negate(); - ret = pp.first; - i = pp.second; - } - else if(is_in_vector(word, out_reserved_words)) // is a reserved word - { - throw PARSE_ERROR( "Unexpected '"+word+"'" + expecting(g_expecting) , i); - } - // end reserved words - else if( word == "function" ) // bash style function - { - if(!g_bash) - throw PARSE_ERROR("bash specific: 'function'", i); - auto wp2=get_word(in, size, skip_unread(in, size, wp.second), VARNAME_END); - if(!valid_name(wp2.first)) - throw PARSE_ERROR( strf("Bad function name: '%s'", word.c_str()), start ); - - i=skip_unread(in, size, wp2.second); - if(word_eq("()", in, size, i)) - i=skip_unread(in, size, i+2); - - auto pp = parse_function(in, size, i, "function definition"); - // function name - pp.first->name = wp2.first; - ret = pp.first; - i = pp.second; - } - else if(word_eq("()", in, size, skip_unread(in, size, wp.second))) // is a function - { - if(!valid_name(word)) - throw PARSE_ERROR( strf("Bad function name: '%s'", word.c_str()), start ); - - auto pp = parse_function(in, size, skip_unread(in, size, wp.second)+2); - // first arg is function name - pp.first->name = word; - ret = pp.first; - i = pp.second; - } - else // is a command - { - auto pp = parse_cmd(in, size, i); - ret = pp.first; - i = pp.second; - } - + auto pp = parse_case(newct); + ret = pp.first; + ctx = pp.second; } - - if(ret->type != block::block_cmd) + else if( word == "if" ) // if { - uint32_t j=skip_chars(in, size, i, SPACES); - auto pp=parse_arglist(in, size, j, false, &ret->redirs); // in case of redirects - if(pp.first != nullptr) - { - delete pp.first; - throw PARSE_ERROR("Extra argument after block", i); - } - i=pp.second; + auto pp=parse_if(newct); + ret = pp.first; + ctx = pp.second; } -#ifndef NO_PARSE_CATCH + else if( word == "for" ) + { + auto pp=parse_for(newct); + ret = pp.first; + ctx = pp.second; + } + else if( word == "while" ) + { + auto pp=parse_while(newct); + ret = pp.first; + ctx = pp.second; + } + else if( word == "until" ) + { + auto pp=parse_while(newct); + pp.first->real_condition()->negate(); + ret = pp.first; + ctx = pp.second; + } + else if(is_in_vector(word, out_reserved_words)) // is a reserved word + { + parse_error( strf("Unexpected '%s'", word.c_str())+expecting(ctx.expecting) , ctx); + } + // end reserved words + else if( word == "function" ) // bash style function + { + if(!ctx.bash) + { + parse_error("bash specific: 'function'", ctx); + newct.has_errored=true; + } + newct.i = skip_unread(newct); + auto wp2=get_word(newct, BASH_BLOCK_END); + if(!valid_name(wp2.first)) + { + parse_error( strf("Bad function name: '%s'", wp2.first.c_str()), newct ); + } + + newct.i = wp2.second; + newct.i=skip_unread(newct); + if(word_eq("()", newct)) + { + newct.i+=2; + newct.i=skip_unread(newct); + } + + auto pp = parse_function(newct, "function definition"); + // function name + pp.first->name = wp2.first; + ret = pp.first; + ctx = pp.second; + } + else if(word_eq("()", ctx.data, ctx.size, skip_unread(ctx.data, ctx.size, wp.second))) // is a function + { + if(!valid_name(word)) + { + parse_error( strf("Bad function name: '%s'", word.c_str()), ctx ); + newct.has_errored=true; + } + + newct.i = skip_unread(ctx.data, ctx.size, wp.second)+2; + auto pp = parse_function(newct); + // first arg is function name + pp.first->name = word; + ret = pp.first; + ctx = pp.second; + } + else // is a command + { + auto pp = parse_cmd(ctx); + ret = pp.first; + ctx = pp.second; + } + } - catch(ztd::format_error& e) + + if(ret->type != block::block_cmd) { - if(ret != nullptr) delete ret; - throw e; + uint32_t j=skip_chars(ctx, SPACES); + ctx.i=j; + auto pp=parse_arglist(ctx, false, &ret->redirs); // in case of redirects + if(pp.first != nullptr) + { + delete pp.first; + parse_error("Extra argument after block", ctx); + pp.second.has_errored=true; + } + ctx=pp.second; } -#endif - return std::make_pair(ret,i); + return std::make_pair(ret,ctx); } // parse main -shmain* parse_text(const char* in, uint32_t size, std::string const& filename) +std::pair parse_text(parse_context ctx) { shmain* ret = new shmain(); - uint32_t i=0; -#ifndef NO_PARSE_CATCH - try + + ret->filename=ctx.filename; + // get shebang + if(word_eq("#!", ctx)) { -#endif - ret->filename=filename; - // get shebang - if(word_eq("#!", in, size, 0)) - { - i=skip_until(in, size, 0, "\n"); - ret->shebang=std::string(in, i); - } - i = skip_unread(in, size, i); - // do bash reading - std::string binshebang = basename(ret->shebang); - g_bash = binshebang == "bash" || binshebang == "lxsh"; - // parse all commands - auto pp=parse_list_until(in, size, i, 0); - ret->lst=pp.first; - i=pp.second; -#ifndef NO_PARSE_CATCH + ctx.i=skip_until(ctx, "\n"); + ret->shebang=std::string(ctx.data, ctx.i); } - catch(ztd::format_error& e) - { - delete ret; - throw ztd::format_error(e.what(), filename, e.data(), e.where()); - } -#endif - return ret; + ctx.i = skip_unread(ctx); + // do bash reading + std::string binshebang = basename(ret->shebang); + if(!ctx.bash) + ctx.bash = (binshebang == "bash" || binshebang == "lxsh"); + // parse all commands + auto pp=parse_list_until(ctx); + ret->lst=std::get<0>(pp); + ctx = std::get<1>(pp); + + if(ctx.has_errored) + throw std::runtime_error("Aborted due to previous errors"); + + return std::make_pair(ret, ctx); +} + +std::pair parse_text(std::string const& in, std::string const& filename) +{ + return parse_text({ .data=in.c_str(), .size=in.size(), .filename=filename.c_str()}); } // import a file's contents into a string diff --git a/src/processing.cpp b/src/processing.cpp index 2707653..0d25e21 100644 --- a/src/processing.cpp +++ b/src/processing.cpp @@ -7,6 +7,10 @@ #include "util.hpp" #include "shellcode.hpp" #include "struc_helper.hpp" +#include "options.hpp" +#include "minify.hpp" + +#include "errcodes.h" // Global regex @@ -415,6 +419,67 @@ std::set find_lxsh_commands(shmain* sh) return ret; } +std::set get_processors(std::string const& in) +{ + std::set ret; + if(in.size()>2 && in[0] == '\'' && in[in.size()-1] == '\'') + { + uint32_t i=1; + while(true) + { + std::string ln = in.substr(i, in.find('\n', i)-i); + if(ln.size()>1 && ln[0] == '#' && is_alphanum(ln[1])) + { + i+=ln.size(); + ret.insert(get_word(make_context(ln.substr(1)), SEPARATORS).first); + } + else + break; + } + } + return ret; +} + +bool r_do_string_processor(_obj* in) +{ + if(in->type == _obj::subarg_string) + { + string_subarg* t = dynamic_cast(in); + auto v = get_processors(t->val); + if(v.find("LXSH_PARSE_MINIFY") != v.end()) + { + try + { + std::string stringcode = t->val.substr(1, t->val.size()-2); + shmain* tsh = parse_text( stringcode ).first; + require_rescan_all(); + if(options["remove-unused"]) + delete_unused( tsh, re_var_exclude, re_fct_exclude ); + if(options["minify-quotes"]) + minify_quotes(tsh); + if(options["minify-var"]) + minify_var( tsh, re_var_exclude ); + if(options["minify-fct"]) + minify_fct( tsh, re_fct_exclude ); + require_rescan_all(); + t->val='\'' + tsh->generate(false, 0) + '\''; + } + catch(format_error& e) // if fail: skip processing + { + std::cerr << "Exception caused in string processing LXSH_PARSE_MINIFY\n"; + printFormatError(e); + exit(ERR_RUNTIME); + } + } + } + return true; +} + +void string_processors(_obj* in) +{ + recurse(r_do_string_processor, in); +} + /** JSON **/ std::string quote_string(std::string const& in) @@ -454,6 +519,7 @@ std::string boolstring(bool in) return "false"; } +#ifdef DEBUG_MODE std::string gen_json_struc(_obj* o) { if(o==nullptr) @@ -781,3 +847,4 @@ std::string gen_json_struc(_obj* o) } return gen_json(vec); } +#endif diff --git a/src/resolve.cpp b/src/resolve.cpp index 59a47a6..e1c4026 100644 --- a/src/resolve.cpp +++ b/src/resolve.cpp @@ -34,7 +34,6 @@ bool add_include(std::string const& file) if(it == truepath) return false; } - // std::cout << truepath << std::endl; included.push_back(truepath); return true; } @@ -60,7 +59,7 @@ void _cd(std::string const& dir) // -- COMMANDS -- // return [] -std::vector> do_include_raw(condlist* cmd, std::string const& filename, std::string* ex_dir) +std::vector> do_include_raw(condlist* cmd, parse_context ctx, std::string* ex_dir) { std::vector> ret; @@ -68,7 +67,7 @@ std::vector> do_include_raw(condlist* cmd, s std::vector rargs; try { - rargs = opts.process(cmd->first_cmd()->args->strargs(1), false, true, false); + rargs = opts.process(cmd->first_cmd()->args->strargs(1), {.stop_on_argument=true}); } catch(ztd::option_error& e) { @@ -78,7 +77,7 @@ std::vector> do_include_raw(condlist* cmd, s std::string dir; if(g_cd && !opts['C']) { - dir=_pre_cd(filename); + dir=_pre_cd(ctx.filename); if(ex_dir!=nullptr) *ex_dir=dir; } @@ -106,7 +105,7 @@ std::vector> do_include_raw(condlist* cmd, s } // -std::pair do_resolve_raw(condlist* cmd, std::string const& filename, std::string* ex_dir) +std::pair do_resolve_raw(condlist* cmd, parse_context ctx, std::string* ex_dir) { std::pair ret; @@ -114,7 +113,7 @@ std::pair do_resolve_raw(condlist* cmd, std::string co std::vector rargs; try { - rargs = opts.process(cmd->first_cmd()->args->strargs(1), false, true, false); + rargs = opts.process(cmd->first_cmd()->args->strargs(1), {.stop_on_argument=true} ); } catch(ztd::option_error& e) { @@ -124,7 +123,7 @@ std::pair do_resolve_raw(condlist* cmd, std::string co std::string dir; if(g_cd && !opts['C']) { - dir=_pre_cd(filename); + dir=_pre_cd(ctx.filename); if(ex_dir!=nullptr) *ex_dir=dir; } @@ -153,23 +152,33 @@ std::pair do_resolve_raw(condlist* cmd, std::string co return ret; } -std::vector do_include_parse(condlist* cmd, std::string const& filename) +std::vector do_include_parse(condlist* cmd, parse_context ctx) { std::vector ret; std::string dir; - auto incs=do_include_raw(cmd, filename, &dir); + auto incs=do_include_raw(cmd, ctx, &dir); - for(auto it: incs) + std::vector shs; + shs.resize(incs.size()); + + for(uint32_t i=0; ilst->cls.begin(), sh->lst->cls.end()); // safety and cleanup sh->lst->cls.resize(0); delete sh; } + shs.resize(0); // cd back _cd(dir); @@ -177,7 +186,7 @@ std::vector do_include_parse(condlist* cmd, std::string const& filena } // if first is nullptr: is a string -std::vector do_resolve_parse(condlist* cmd, std::string const& filename) +std::vector do_resolve_parse(condlist* cmd, parse_context ctx) { std::vector ret; @@ -186,10 +195,13 @@ std::vector do_resolve_parse(condlist* cmd, std::string const& filena { // get std::string dir; - p=do_resolve_raw(cmd, filename, &dir); + p=do_resolve_raw(cmd, ctx, &dir); + // do parse - shmain* sh = parse_text(p.second); - resolve(sh); + parse_context newctx = make_context(ctx, p.second, '`'+p.first+'`'); + auto pp = parse_text(newctx); + shmain* sh = pp.first; + resolve(sh, pp.second); // get the cls ret = sh->lst->cls; // safety and cleanup @@ -198,9 +210,9 @@ std::vector do_resolve_parse(condlist* cmd, std::string const& filena // cd back _cd(dir); } - catch(ztd::format_error& e) + catch(format_error& e) { - throw ztd::format_error(e.what(), '`'+p.first+'`', e.data(), e.where()); + throw format_error(e.what(), '`'+p.first+'`', e.data(), e.where()); } return ret; @@ -208,7 +220,7 @@ std::vector do_resolve_parse(condlist* cmd, std::string const& filena // -- OBJECT CALLS -- -std::pair< std::vector , bool > resolve_condlist(condlist* in, std::string const& filename) +std::pair< std::vector , bool > resolve_condlist(condlist* in, parse_context ctx) { cmd* tc = in->first_cmd(); if(tc == nullptr) @@ -217,14 +229,14 @@ std::pair< std::vector , bool > resolve_condlist(condlist* in, std::s std::string const& strcmd=tc->arg_string(0); if(g_include && strcmd == "%include") - return std::make_pair(do_include_parse(in, filename), true); + return std::make_pair(do_include_parse(in, ctx), true); else if(g_resolve && strcmd == "%resolve") - return std::make_pair(do_resolve_parse(in, filename), true); + return std::make_pair(do_resolve_parse(in, ctx), true); else return std::make_pair(std::vector(), false); } -std::pair< std::vector , bool > resolve_arg(arg* in, std::string const& filename, bool forcequote=false) +std::pair< std::vector , bool > resolve_arg(arg* in, parse_context ctx, bool forcequote=false) { std::vector ret; if(in == nullptr) @@ -250,12 +262,12 @@ std::pair< std::vector , bool > resolve_arg(arg* in, std::string const& fi std::string fulltext; if(g_include && strcmd == "%include") { - for(auto it: do_include_raw(tc, filename) ) + for(auto it: do_include_raw(tc, ctx) ) fulltext += it.second; } else if(g_resolve && strcmd == "%resolve") { - fulltext = do_resolve_raw(tc, filename).second; + fulltext = do_resolve_raw(tc, ctx).second; } else // skip continue; @@ -321,10 +333,10 @@ std::pair< std::vector , bool > resolve_arg(arg* in, std::string const& fi return std::make_pair(ret, has_resolved); } - +void resolve(_obj* in, parse_context* ctx); // -- RECURSIVE CALL -- -bool r_resolve(_obj* o, std::string* filename) +bool r_resolve(_obj* o, parse_context* ct) { switch(o->type) { @@ -337,7 +349,7 @@ bool r_resolve(_obj* o, std::string* filename) auto t = dynamic_cast(o); for(uint32_t i=0 ; icls.size() ; i++) { - auto r=resolve_condlist(t->cls[i], *filename); + auto r=resolve_condlist(t->cls[i], *ct); if(r.second) { // add new cls after current @@ -350,7 +362,7 @@ bool r_resolve(_obj* o, std::string* filename) } else { - resolve(t->cls[i], filename); + resolve(t->cls[i], ct); } } return false; @@ -360,7 +372,7 @@ bool r_resolve(_obj* o, std::string* filename) auto t = dynamic_cast(o); for(uint32_t i=0 ; isize() ; i++) { - auto r=resolve_arg(t->args[i], *filename); + auto r=resolve_arg(t->args[i], *ct); if(r.first.size()>0) { // add new args @@ -372,7 +384,7 @@ bool r_resolve(_obj* o, std::string* filename) } else { - resolve(t->args[i], filename); + resolve(t->args[i], ct); } } return false; @@ -382,12 +394,12 @@ bool r_resolve(_obj* o, std::string* filename) auto t = dynamic_cast(o); for(auto it: t->var_assigns) // var assigns { - resolve_arg(it.second, *filename, true); // force quoted - resolve(it.second, filename); + resolve_arg(it.second, *ct, true); // force quoted + resolve(it.second, ct); } for(auto it: t->redirs) - resolve(it, filename); - resolve(t->args, filename); + resolve(it, ct); + resolve(t->args, ct); return false; }; break; case _obj::block_case : @@ -395,15 +407,15 @@ bool r_resolve(_obj* o, std::string* filename) auto t = dynamic_cast(o); for(auto sc: t->cases) { - resolve_arg(t->carg, *filename, true); // force quoted - resolve(t->carg, filename); + resolve_arg(t->carg, *ct, true); // force quoted + resolve(t->carg, ct); for(auto it: sc.first) { - resolve_arg(it, *filename, true); // force quoted - resolve(it, filename); + resolve_arg(it, *ct, true); // force quoted + resolve(it, ct); } - resolve(sc.second, filename); + resolve(sc.second, ct); } }; break; default: break; @@ -412,12 +424,13 @@ bool r_resolve(_obj* o, std::string* filename) } // recursive call of resolve -void resolve(_obj* in, std::string* filename) +void resolve(_obj* in, parse_context* ctx) { - recurse(r_resolve, in, filename); + recurse(r_resolve, in, ctx); } -void resolve(shmain* sh) +// recursive call of resolve +void resolve(_obj* in, parse_context ctx) { - recurse(r_resolve, sh, &sh->filename); + recurse(r_resolve, in, &ctx); } diff --git a/src/shellcode.cpp b/src/shellcode.cpp index 0d5a563..84e7edf 100644 --- a/src/shellcode.cpp +++ b/src/shellcode.cpp @@ -4,13 +4,14 @@ #include "processing.hpp" #include "struc_helper.hpp" -const std::map lxsh_extend_fcts = { - { "_lxsh_random", { "[K]", "Generate a random number between 0 and 2^(k*8). Default 2", RANDOM_SH} }, - { "_lxsh_random_string", { "[N]", "Generate a random alphanumeric string of length N. Default 20", RANDOM_STRING_SH} }, - { "_lxsh_random_tmpfile", { "[N]", "Get a random TMP filepath, with N random chars. Default 20", RANDOM_TMPFILE_SH, {"_lxsh_random_string"} } } +const std::map lxsh_extend_fcts = { + { "_lxsh_random", { "[K]", "Generate a random number between 0 and 2^(K*8). Default 2", RANDOM_SH} }, + { "_lxsh_random_string", { "[N]", "Generate a random alphanumeric string of length N. Default 20", RANDOM_STRING_SH} }, + { "_lxsh_random_tmpfile", { "[N]", "Get a random TMP filepath, with N random chars. Default 20", RANDOM_TMPFILE_SH, {"_lxsh_random_string"} } + } }; -const std::map lxsh_array_fcts = { +const std::map lxsh_array_fcts = { { "_lxsh_array_create", { "", "Create an array out of input arguments", ARRAY_CREATE_SH} }, { "_lxsh_array_get", { " ", "Get value from array", ARRAY_GET_SH} }, { "_lxsh_array_set", { " ", "Set value of array", ARRAY_SET_SH} }, @@ -19,7 +20,7 @@ const std::map lxsh_array_fcts = { { "_lxsh_map_set", { " ", "Set value of map", MAP_SET_SH} } }; -std::map create_allfcts() +std::map create_allfcts() { auto r = lxsh_array_fcts; for(auto it: lxsh_extend_fcts) @@ -27,7 +28,7 @@ std::map create_allfcts() return r; } -const std::map lxsh_allfcts = create_allfcts(); +const std::map lxsh_allfcts = create_allfcts(); void add_lxsh_fcts(shmain* sh, std::set fcts) { diff --git a/src/struc_helper.cpp b/src/struc_helper.cpp index 68b5cc2..85235c1 100644 --- a/src/struc_helper.cpp +++ b/src/struc_helper.cpp @@ -9,7 +9,7 @@ arg* make_arg(std::string const& in) { - return parse_arg(in.c_str(), in.size(), 0).first; + return parse_arg(make_context(in)).first; } cmd* make_cmd(std::vector const& args) @@ -34,7 +34,7 @@ cmd* make_cmd(std::vector const& args) cmd* make_cmd(std::string const& in) { - return parse_cmd(in.c_str(), in.size(), 0).first; + return parse_cmd(make_context(in)).first; } pipeline* make_pipeline(std::vector const& bls) @@ -48,22 +48,23 @@ pipeline* make_pipeline(std::vector const& bls) pipeline* make_pipeline(std::string const& in) { - return parse_pipeline(in.c_str(), in.size(), 0).first; + return parse_pipeline(make_context(in)).first; } condlist* make_condlist(std::string const& in) { - return parse_condlist(in.c_str(), in.size(), 0).first; + return parse_condlist(make_context(in)).first; } list* make_list(std::string const& in) { - return parse_list_until(in.c_str(), in.size(), 0, 0).first; + auto t = parse_list_until(make_context(in)); + return std::get<0>(t); } block* make_block(std::string const& in) { - return parse_block(in.c_str(), in.size(), 0).first; + return parse_block(make_context(in)).first; } @@ -71,7 +72,7 @@ block* make_block(std::string const& in) arg* copy(arg* in) { std::string str = in->generate(0); - return parse_arg(str.c_str(), str.size(), 0).first; + return parse_arg(make_context(str)).first; } // modifiers @@ -147,20 +148,47 @@ size_t cmd::arglist_size() // string getters +bool arg::is_string() +{ + return sa.size() == 1 && sa[0]->type == _obj::subarg_string; +} + std::string arg::string() { - if(sa.size() != 1 || sa[0]->type != subarg::subarg_string) + if(!this->is_string()) return ""; return dynamic_cast(sa[0])->val; } std::string arg::first_sa_string() { - if(sa.size() <=0 || sa[0]->type != subarg::subarg_string) - return ""; + if(sa.size() <=0 || sa[0]->type != _obj::subarg_string) + return ""; return dynamic_cast(sa[0])->val; } +bool arg::can_expand() +{ + for(auto it: sa) + { + if(it->type != _obj::subarg_string && !it->quoted) + return true; + } + return false; +} + +bool arglist::can_expand() +{ + bool arg_expands=false; + for(auto it: args) + { + arg_expands = it->can_expand(); + if(arg_expands) + return true; + } + return false; +} + std::vector arglist::strargs(uint32_t start) { std::vector ret; diff --git a/src/util.cpp b/src/util.cpp index dd3d5c4..a0a87fa 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -7,8 +7,10 @@ #include #include +#include #include +#include std::string indenting_string="\t"; @@ -210,16 +212,15 @@ std::string repeatString(std::string const& str, uint32_t n) return ret; } -void printFormatError(ztd::format_error const& e, bool print_line) +void printFormatError(format_error const& e, bool print_line) { - printErrorIndex(e.data(), e.where(), e.what(), e.origin(), print_line); -} + const char* in = e.data(); -void printErrorIndex(const char* in, const int index, const std::string& message, const std::string& origin, bool print_line) -{ - int i=0, j=0; // j: last newline - int line=1; //n: line # - int in_size=strlen(in); + uint64_t index = e.where(); + + uint64_t i=0, j=0; // j: last newline + uint64_t line=1; //n: line # + uint64_t in_size=strlen(in); if(index >= 0) { while(i < in_size && i < index) @@ -232,59 +233,25 @@ void printErrorIndex(const char* in, const int index, const std::string& message i++; } while(i < in_size && in[i]!='\n') - { i++; - } } - if(origin != "") + std::cerr << ztd::color::b_white; + fprintf(stderr, "%s:%lu:%lu: ", e.origin(), line, index-j+1); + + ztd::color level_color; + const std::string& level = e.level(); + if(level == "error") + level_color = ztd::color::b_red; + else if(level == "warning") + level_color = ztd::color::b_magenta; + else if(level == "info") + level_color = ztd::color::b_cyan; + + std::cerr << level_color << e.level() << ztd::color::none; + fprintf(stderr, ": %s\n", e.what()); + if(print_line) { - fprintf(stderr, "%s:%u:%u: %s\n", origin.c_str(), line, index-j+1, message.c_str()); - if(print_line) - { - std::cerr << std::string(in+j, i-j) << std::endl; - std::cerr << repeatString(" ", index-j) << '^' << std::endl; - } + std::cerr << std::string(in+j, i-j) << std::endl; + std::cerr << repeatString(" ", index-j) << '^' << std::endl; } } - - -int execute(shmain* sh, std::vector& args) -{ - std::string data=sh->generate(); - - std::string filename = basename(args[0]); - - // generate path - std::string tmpdir = (getenv("TMPDIR") != NULL) ? getenv("TMPDIR") : "/tmp" ; - std::string dirpath = tmpdir + "/lxsh_" + ztd::sh("tr -dc '[:alnum:]' < /dev/urandom | head -c10"); - std::string filepath = dirpath+'/'+filename; - - // create dir - if(ztd::exec("mkdir", "-p", dirpath).second) - { - throw std::runtime_error("Failed to create directory '"+dirpath+'\''); - } - - // create stream - std::ofstream stream(filepath); - if(!stream) - { - ztd::exec("rm", "-rf", dirpath); - throw std::runtime_error("Failed to write to '"+filepath+'\''); - } - - // output - stream << data; - stream.close(); - if(ztd::exec("chmod", "+x", filepath).second != 0) - { - ztd::exec("rm", "-rf", dirpath); - throw std::runtime_error("Failed to make '"+filepath+"' executable"); - } - - // exec - int retval=_exec(filepath, args); - ztd::exec("rm", "-rf", dirpath); - - return retval; -}