diff --git a/.gitignore b/.gitignore index e51dd97..a2a9d70 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,4 @@ /gmon.out /profiling/* /profiling.* -/include/g_version.h +/include/g_*.h diff --git a/Makefile b/Makefile index d9c53f6..8a7d789 100644 --- a/Makefile +++ b/Makefile @@ -39,11 +39,12 @@ endif ## END CONFIG ## $(shell ./generate_version.sh) +$(shell ./generate_shellcode.sh) $(shell mkdir -p $(ODIR)) $(shell mkdir -p $(BINDIR)) # automatically find .h and .hpp -DEPS = $(shell find $(IDIR) -type f -regex '.*\.hp?p?' ! -name 'g_version.h') +DEPS = $(shell find $(IDIR) -type f -regex '.*\.hp?p?' ! -name 'g_version.h' ! -name 'g_shellcode.h') # automatically find .c and .cpp and make the corresponding .o rule OBJ = $(shell find $(SRCDIR) -type f -regex '.*\.cp?p?' | sed 's|\.cpp|.o|g;s|\.c|.o|g;s|^$(SRCDIR)/|$(ODIR)/|g') @@ -58,6 +59,9 @@ $(ODIR)/%.o: $(SRCDIR)/%.cpp $(DEPS) $(ODIR)/main.o: $(SRCDIR)/main.cpp $(DEPS) $(IDIR)/g_version.h $(CC) $(CXXFLAGS) -c -o $@ $< +$(ODIR)/debashify.o: $(SRCDIR)/debashify.cpp $(DEPS) $(IDIR)/g_shellcode.h + $(CC) $(CXXFLAGS) -c -o $@ $< + lxsh: $(OBJ) $(CC) $(CXXFLAGS) -o $@ $^ $(LDFLAGS) diff --git a/generate_shellcode.sh b/generate_shellcode.sh new file mode 100755 index 0000000..ba52653 --- /dev/null +++ b/generate_shellcode.sh @@ -0,0 +1,34 @@ +#!/bin/sh + +file=include/g_shellcode.h +tmpfile=${TMPDIR-/tmp}/lxsh_shellcodegen +codedir=shellcode + +# $1 = file +minimize() { + if which lxsh >/dev/null 2>&1 ; then + lxsh -m "$1" + elif which shfmt >/dev/null 2>&1 ; then + shfmt -mn "$1" + else + cat "$1" + fi +} + +to_cstr() { + sed 's|\\|\\\\|g;s|\"|\\\"|g' | sed ':a;N;$!ba;s/\n/\\n/g;' +} + +echo '#ifndef G_VERSION_H' > "$tmpfile" +echo '#define G_VERSION_H' >> "$tmpfile" +for I in "$codedir"/*.sh +do + printf '#define %s "%s"\n' "$(basename "$I" | tr [:lower:] [:upper:] | tr '.' '_')" "$(minimize "$I" | to_cstr)" >> "$tmpfile" +done +echo "#endif" >> "$tmpfile" + +if [ "$(md5sum "$tmpfile" | cut -d' ' -f1)" != "$(md5sum "$file" | cut -d' ' -f1)" ] ; then + mv "$tmpfile" "$file" +else + rm "$tmpfile" +fi diff --git a/include/struc.hpp b/include/struc.hpp index da77f67..27340ba 100644 --- a/include/struc.hpp +++ b/include/struc.hpp @@ -308,7 +308,7 @@ public: static const std::string empty_string; - std::string const& firstarg_string(); + std::string const& arg_string(uint32_t n); size_t arglist_size(); diff --git a/include/struc_helper.hpp b/include/struc_helper.hpp index 6ce5870..8b48845 100644 --- a/include/struc_helper.hpp +++ b/include/struc_helper.hpp @@ -16,6 +16,8 @@ pipeline* make_pipeline(std::string const& in); condlist* make_condlist(std::string const& in); list* make_list(std::string const& in); +block* make_block(std::string const& in); + // copy arg* copy(arg* in); variable* copy(variable* in); diff --git a/shellcode/array_create.sh b/shellcode/array_create.sh new file mode 100644 index 0000000..315ff52 --- /dev/null +++ b/shellcode/array_create.sh @@ -0,0 +1,7 @@ +__lxsh_array_create() { + printf "%s" "$1" + shift 1 + for N ; do + printf "\t%s" "$N" + done +} diff --git a/shellcode/array_get.sh b/shellcode/array_get.sh new file mode 100644 index 0000000..262151c --- /dev/null +++ b/shellcode/array_get.sh @@ -0,0 +1,7 @@ +__lxsh_array_get() { + if [ "$2" = "*" ] || [ "$2" = "@" ] ; then + printf "%s" "$1" | tr '\t' ' ' + else + printf "%s" "$1" | cut -f$(($2+1)) + fi +} diff --git a/shellcode/array_set.sh b/shellcode/array_set.sh new file mode 100644 index 0000000..31a4e99 --- /dev/null +++ b/shellcode/array_set.sh @@ -0,0 +1,6 @@ +__lxsh_array_set() +{ + [ "$2" -gt 0 ] && printf "%s\t" "$(printf "%s" "$1" | cut -f1-$2)" + printf "%s" "$3" + [ "$2" -lt $(printf "%s" "$1" | tr -dc '\t' | wc -c) ] && { printf "\t" ; printf "%s" "$1"|cut -f$(($2+2))-; } +} diff --git a/shellcode/map_create.sh b/shellcode/map_create.sh new file mode 100644 index 0000000..91aa608 --- /dev/null +++ b/shellcode/map_create.sh @@ -0,0 +1,6 @@ +__lxsh_map_create() { + for I + do + printf "%s]%s\n" "$(echo "$I" | cut -d']' -f1 | cut -d '[' -f2)" "$(echo "$I" | cut -d '=' -f2-)" + done +} diff --git a/shellcode/map_get.sh b/shellcode/map_get.sh new file mode 100644 index 0000000..36a11a2 --- /dev/null +++ b/shellcode/map_get.sh @@ -0,0 +1,7 @@ +__lxsh_map_get() { + if [ "$2" = \* ] || [ "$2" = @ ] ; then + printf "%s\n" "$1" | sort | cut -d ']' -f2- + else + printf "%s\n" "$1" | grep "^$2\]" | cut -d ']' -f2- + fi +} diff --git a/shellcode/map_set.sh b/shellcode/map_set.sh new file mode 100644 index 0000000..55a9af6 --- /dev/null +++ b/shellcode/map_set.sh @@ -0,0 +1,6 @@ +__lxsh_map_set() { + printf "%s\n" "$1" | grep -v "^$2\]" + if [ -n "$3" ] ; then + printf "%s]%s\n" "$2" "$3" + fi +} diff --git a/shellcode/random_string.sh b/shellcode/random_string.sh new file mode 100644 index 0000000..134da5a --- /dev/null +++ b/shellcode/random_string.sh @@ -0,0 +1,3 @@ +__lxsh_random_string() { + env LC_CTYPE=C tr -dc 'a-zA-Z0-9' debashed_arrays; + bool need_map_create=false; + bool need_map_set=false; + bool need_map_get=false; + // map of detected arrays + // bool value: is associative + std::map arrays; } debashify_params; @@ -99,7 +106,7 @@ bool debashify_bashtest(pipeline* pl) return false; cmd* in = dynamic_cast(pl->cmds[0]); - if(in->firstarg_string() == "[[") + if(in->arg_string(0) == "[[") { brace* br = new brace(new list); condlist* cl = new condlist; @@ -134,27 +141,80 @@ bool debashify_bashtest(pipeline* pl) return false; } -bool debashify_declare(cmd* in) +void warn(std::string const& in) { - std::string cmd=in->firstarg_string(); - if(cmd == "declare" || cmd == "typeset" || cmd == "readonly") - throw std::runtime_error(strf("Cannot debashify '%s'", cmd.c_str())); - return false; + std::cerr << "WARN: " << in << std::endl; } -cmd* make_array_get_cmd(std::string const& varname, arg* index) +std::string get_declare_opt(cmd* in) +{ + if(in->var_assigns[0].second!=nullptr) + { + return in->var_assigns[0].second->string(); + } + return ""; +} + +bool debashify_declare(list* in, debashify_params* params) +{ + bool has_found=false; + for(uint32_t i=0; icls.size(); i++) + { + // not a cmd: go to next + if(in->cls[i]->pls[0]->cmds[0]->type != _obj::block_cmd) + continue; + + cmd* c1 = dynamic_cast(in->cls[i]->pls[0]->cmds[0]); + std::string const& cmdstr=c1->arg_string(0); + if(cmdstr == "declare" || cmdstr == "typeset" || cmdstr == "readonly") + { + if(cmdstr == "readonly") + { + warn("removing 'readonly'"); + } + else + { + std::string const& op = get_declare_opt(c1); + if(op == "-a") + { + for(auto it: c1->var_assigns) + { + if(it.first != nullptr) + params->arrays[it.first->varname] = false; + } + } + else if(op == "-A") + { + for(auto it: c1->var_assigns) + { + if(it.first != nullptr) + params->arrays[it.first->varname] = true; + } + } + else + warn( strf("removing '%s' with argument '%s'", cmdstr.c_str(), op.c_str()) ); + } + has_found=true; + delete in->cls[i]; + in->cls.erase(in->cls.begin()+i); + } + } + return has_found; +} + +cmd* make_cmd_varindex(std::string const& strcmd, std::string const& varname, arg* index) { cmd* c = new cmd(new arglist); - // __lxsh_array_get - c->args->add( new arg("__lxsh_array_get") ); - // __lxsh_array_get "$VAR" + // cmd + c->args->add( new arg(strcmd) ); + // cmd "$VAR" c->args->add( make_arg("\"$"+varname+"\"") ); - // __lxsh_array_get "$VAR" N + // cmd "$VAR" N c->args->add( index ); return c; } -subshell_arithmetic* do_debashify_arithmetic(arithmetic* in) +subshell_arithmetic* do_debashify_arithmetic(arithmetic* in, debashify_params* params) { subshell_arithmetic* ret = nullptr; if(in->type == _obj::arithmetic_variable) @@ -169,7 +229,18 @@ subshell_arithmetic* do_debashify_arithmetic(arithmetic* in) arg* index = t->var->index; t->var->index=nullptr; - cmd* c = make_array_get_cmd(varname, index); + cmd* c; + if(params->arrays[varname]) + { + c = make_cmd_varindex("__lxsh_map_get", varname, index); + params->need_map_get=true; + } + else + { + c = make_cmd_varindex("__lxsh_array_get", varname, index); + params->need_array_get=true; + } + ret = new subshell_arithmetic(new subshell(c)); } } @@ -183,7 +254,7 @@ bool debashify_array_arithmetic(_obj* o, debashify_params* params) { case _obj::subarg_arithmetic: { arithmetic_subarg* t = dynamic_cast(o); - arithmetic* r = do_debashify_arithmetic(t->arith); + arithmetic* r = do_debashify_arithmetic(t->arith, params); if(r!=nullptr) { ret=true; @@ -193,14 +264,14 @@ bool debashify_array_arithmetic(_obj* o, debashify_params* params) } break; case _obj::arithmetic_operation: { operation_arithmetic* t = dynamic_cast(o); - arithmetic* r = do_debashify_arithmetic(t->val1); + arithmetic* r = do_debashify_arithmetic(t->val1, params); if(r!=nullptr) { ret=true; delete t->val1; t->val1 = r; } - r = do_debashify_arithmetic(t->val2); + r = do_debashify_arithmetic(t->val2, params); if(r!=nullptr) { ret=true; @@ -210,7 +281,7 @@ bool debashify_array_arithmetic(_obj* o, debashify_params* params) } break; case _obj::arithmetic_parenthesis: { parenthesis_arithmetic* t = dynamic_cast(o); - arithmetic* r = do_debashify_arithmetic(t->val); + arithmetic* r = do_debashify_arithmetic(t->val, params); if(r!=nullptr) { ret=true; @@ -237,7 +308,6 @@ bool debashify_array_get(arg* in, debashify_params* params) if(t->var->manip != nullptr) throw std::runtime_error("Cannot debashify manipulations on ${VAR[]}"); - params->need_array_get = true; std::string varname = t->var->varname; arg* index = t->var->index; t->var->index=nullptr; @@ -248,7 +318,18 @@ bool debashify_array_get(arg* in, debashify_params* params) index = new arg("\\*"); } - cmd* c = make_array_get_cmd(varname, index); + cmd* c; + if(params->arrays[varname]) + { + c = make_cmd_varindex("__lxsh_map_get", varname, index); + params->need_map_get=true; + } + else + { + c = make_cmd_varindex("__lxsh_array_get", varname, index); + params->need_array_get=true; + } + subshell_subarg* sb = new subshell_subarg(new subshell(c)); sb->quoted=quoted; delete *it; @@ -265,19 +346,28 @@ bool debashify_array_set(cmd* in, debashify_params* params) bool has_replaced=false; for(auto it = in->var_assigns.begin() ; it != in->var_assigns.end() ; it++) { - if(it->second != nullptr && it->second->size()>0 && it->second->first_sa_string().substr(0,2) == "=(") + if(it->first!=nullptr && it->second != nullptr && it->second->size()>0 && it->second->first_sa_string().substr(0,2) == "=(") { // array creation: VAR=() - params->need_array_create=true; // extract arguments from =(ARGS...) std::string gen=it->second->generate(0); + std::string varname=it->first->varname; gen=gen.substr(2); gen.pop_back(); // create cmd out of arguments arglist* args = parse_arglist( gen.c_str(), gen.size(), 0 ).first; cmd* c = new cmd(args); - // cmd first argument is __lxsh_array_create - c->args->insert(0, new arg("__lxsh_array_create") ); + // cmd first argument is __lxsh_X_create + if(params->arrays[varname]) + { + c->args->insert(0, new arg("__lxsh_map_create") ); + params->need_map_create=true; + } + else + { + c->args->insert(0, new arg("__lxsh_array_create") ); + params->need_array_create=true; + } subshell_subarg* sb = new subshell_subarg(new subshell(c)); // insert new value delete it->second; @@ -303,19 +393,40 @@ bool debashify_array_set(cmd* in, debashify_params* params) if(tt->val.substr(0,2) == "+=") { tt->val = tt->val.substr(2); // remove += + // create array get of value - cmd* c = make_array_get_cmd(varname, copy(index)); + cmd* c; + if(params->arrays[varname]) + { + c = make_cmd_varindex("__lxsh_map_get", varname, copy(index)); + params->need_map_get=true; + } + else + { + c = make_cmd_varindex("__lxsh_array_get", varname, copy(index)); + params->need_array_get=true; + } subshell_subarg* sb = new subshell_subarg(new subshell(c)); sb->quoted=true; value->insert(0, "\""); value->insert(0, sb); value->insert(0, "\""); + } else tt->val = tt->val.substr(1); // remove = cmd* c = new cmd(new arglist); - c->args->add( new arg("__lxsh_array_set") ); + if(params->arrays[varname]) + { + c->args->add(new arg("__lxsh_map_set") ); + params->need_map_set=true; + } + else + { + c->args->add(new arg("__lxsh_array_set") ); + params->need_array_set=true; + } // __lxsh_array_set "$VAR" c->args->add( make_arg("\"$"+varname+"\"") ); // __lxsh_array_set "$VAR" N @@ -345,7 +456,16 @@ bool debashify_array_set(cmd* in, debashify_params* params) arglist* args = parse_arglist( gen.c_str(), gen.size(), 0 ).first; cmd* c = new cmd(args); // cmd first argument is __lxsh_array_create - c->args->insert(0, new arg("__lxsh_array_create") ); + if(params->arrays[varname]) + { + throw std::runtime_error("Cannot debashify VAR+=() on associative arrays"); + } + else + { + c->args->insert(0, new arg("__lxsh_array_create") ); + params->need_array_create=true; + } + // second arg is varname c->args->insert(1, make_arg("\"$"+varname+"\"") ); subshell_subarg* sb = new subshell_subarg(new subshell(c)); // insert new value @@ -523,38 +643,6 @@ bool debashify_procsub(list* lst, debashify_params* params) return has_replaced; } -// create the random string generator function -// -block* create_random_string_func() -{ - std::string code="__lxsh_random_string() { env LC_CTYPE=C tr -dc 'a-zA-Z0-9' (o); + debashify_declare(t, params); debashify_procsub(t, params); } break; case _obj::_pipeline: { @@ -577,7 +666,6 @@ bool r_debashify(_obj* o, debashify_params* params) case _obj::block_cmd: { cmd* t = dynamic_cast(o); debashify_combined_redirects(t); - debashify_declare(t); debashify_array_set(t, params); debashify_plusequal(t, params); } break; @@ -624,13 +712,19 @@ void debashify(shmain* sh) sh->shebang = "#!/bin/sh"; recurse(r_debashify, sh, ¶ms); if(params.need_random_string || params.need_random_tmpfile) - sh->lst->insert(0, new condlist(create_random_tmpfile_func())); + sh->lst->insert(0, new condlist(make_block(RANDOM_TMPFILE_SH))); if(params.need_random_tmpfile) - sh->lst->insert(0, new condlist(create_random_string_func())); + sh->lst->insert(0, new condlist(make_block(RANDOM_STRING_SH))); if(params.need_array_create) - sh->lst->insert(0, new condlist(create_array_create_func())); + sh->lst->insert(0, new condlist(make_block(ARRAY_CREATE_SH))); if(params.need_array_set) - sh->lst->insert(0, new condlist(create_array_set_func())); + sh->lst->insert(0, new condlist(make_block(ARRAY_SET_SH))); if(params.need_array_get) - sh->lst->insert(0, new condlist(create_array_get_func())); + sh->lst->insert(0, new condlist(make_block(ARRAY_GET_SH))); + if(params.need_map_create) + sh->lst->insert(0, new condlist(make_block(MAP_CREATE_SH))); + if(params.need_map_set) + sh->lst->insert(0, new condlist(make_block(MAP_SET_SH))); + if(params.need_map_get) + sh->lst->insert(0, new condlist(make_block(MAP_GET_SH))); } diff --git a/src/minimize.cpp b/src/minimize.cpp index dee33bb..189313a 100644 --- a/src/minimize.cpp +++ b/src/minimize.cpp @@ -39,7 +39,7 @@ bool r_replace_fct(_obj* in, strmap_t* fctmap) }; break; case _obj::block_cmd: { cmd* t = dynamic_cast(in); - std::string cmdname = t->firstarg_string(); + std::string cmdname = t->arg_string(0); auto el=fctmap->find(cmdname); if(el!=fctmap->end()) { diff --git a/src/processing.cpp b/src/processing.cpp index e1267a1..0ceb372 100644 --- a/src/processing.cpp +++ b/src/processing.cpp @@ -164,7 +164,7 @@ bool cmd::is_argvar() bool cmd::is(std::string const& in) { - return in == this->firstarg_string(); + return in == this->arg_string(0); } /** GETTERS **/ @@ -303,7 +303,7 @@ bool r_get_cmd(_obj* in, countmap_t* all_cmds) { case _obj::block_cmd: { cmd* t = dynamic_cast(in); - std::string cmdname = t->firstarg_string(); + std::string cmdname = t->arg_string(0); if(cmdname != "" && !all_cmds->insert( std::make_pair(cmdname, 1) ).second) (*all_cmds)[cmdname]++; }; break; diff --git a/src/resolve.cpp b/src/resolve.cpp index 0566955..9a2d0be 100644 --- a/src/resolve.cpp +++ b/src/resolve.cpp @@ -213,7 +213,7 @@ std::pair< std::vector , bool > resolve_condlist(condlist* in, shmain if(tc == nullptr) return std::make_pair(std::vector(), false); - std::string const& strcmd=tc->firstarg_string(); + std::string const& strcmd=tc->arg_string(0); if(g_include && strcmd == "%include") return std::make_pair(do_include_parse(in, parent), true); @@ -245,7 +245,7 @@ std::pair< std::vector , bool > resolve_arg(arg* in, shmain* parent, bool cmd* c = tc->first_cmd(); if(c == nullptr) // skip if not cmd continue; - std::string strcmd=c->firstarg_string(); + std::string strcmd=c->arg_string(0); std::string fulltext; if(g_include && strcmd == "%include") { diff --git a/src/struc_helper.cpp b/src/struc_helper.cpp index 6b96374..68e18a5 100644 --- a/src/struc_helper.cpp +++ b/src/struc_helper.cpp @@ -61,6 +61,12 @@ list* make_list(std::string const& in) return parse_list_until(in.c_str(), in.size(), 0, 0).first; } +block* make_block(std::string const& in) +{ + return parse_block(in.c_str(), in.size(), 0).first; +} + + // copy arg* copy(arg* in) { @@ -152,10 +158,10 @@ std::vector arglist::strargs(uint32_t start) return ret; } -std::string const& cmd::firstarg_string() +std::string const& cmd::arg_string(uint32_t n) { - if(args!=nullptr && args->args.size()>0 && args->args[0]->sa.size() == 1 && args->args[0]->sa[0]->type == _obj::subarg_string) - return dynamic_cast(args->args[0]->sa[0])->val; + if(args!=nullptr && args->args.size()>n && args->args[n]->sa.size() == 1 && args->args[n]->sa[0]->type == _obj::subarg_string) + return dynamic_cast(args->args[n]->sa[0])->val; return cmd::empty_string; }