This commit is contained in:
zawz 2023-02-22 15:22:49 +01:00
commit 4637861cb9
13 changed files with 740 additions and 0 deletions

4
.gitignore vendored Normal file
View file

@ -0,0 +1,4 @@
TODO
Zmakefile
/backups
/diffstore

25
Makefile Normal file
View file

@ -0,0 +1,25 @@
var_exclude = DIFF_ENGINE COMPRESSION_TYPE COMPRESSION_LEVEL RETENTION_TYPE RETENTION_AMOUNT DEBUG ENCRYPTION
fct_exclude = diff_.* patch_.* cmp_.* dcmp_.* clean_.*
diffstore: src/*
lxsh -o diffstore -M --exclude-var "$(var_exclude)" --exclude-fct "$(fct_exclude)" src/main.sh
debug: src/*
lxsh -o diffstore src/main.sh
bash: src/*
lxsh --bash -o diffstore src/main.sh
build: diffstore
install: build
mv diffstore /usr/local/bin
cp completion/diffstore.bash /etc/bash_completion.d
uninstall:
rm /usr/local/bin/diffstore
rm /etc/bash_completion.d/diffstore.bash
clear:
rm diffstore

25
README.md Normal file
View file

@ -0,0 +1,25 @@
# diffstore
Utility to store files using differentials.
This tool is mostly inteded as a file backup history.
It can acheive much higher compression ratios while still being decently fast on compression.
However retreiving older backups takes much longer.
This should only be used to store either very similar files or a history of one file,
in the case where files are too different, it can result in more disk usage.
This means input files have to be uncompressed and unencrypted.
### Example configs
- rdiff + zstd -3 : best compression/time ratio
- diff + gzip : best compatibility
- rdiff + xz -9 : best compression
## Building
Requires [lxsh](https://git.zawz.net/zawz/lxsh)
```sh
make
```

36
completion/diffstore.bash Normal file
View file

@ -0,0 +1,36 @@
#/usr/bin/env bash
_diffstore_completion()
{
local _cw2=(init list add store get clean config)
local _cw2_val=(get)
local _config_
local cur=$2
local N=0
local j I
local dir
local WORDS=()
COMPREPLY=()
if [ $COMP_CWORD -eq 1 ] ; then
_filedir -d
elif [ $COMP_CWORD -eq 2 ] ; then
WORDS=("${_cw2[@]}")
elif { [ "$COMP_CWORD" -eq "3" ] && echo "${_cw2_val[*]}" | grep -qw -- "${COMP_WORDS[2]}" ; } ; then
N=0
while read -r I ; do
WORDS+=("$I")
done < <($1 "${COMP_WORDS[1]}" list basic)
fi
N=0
if [ -n "$cur" ] ; then
for I in "${WORDS[@]}" ; do
[ "$I" != "${I#"$cur"}" ] && COMPREPLY[N++]=$I
done
else
COMPREPLY=("${WORDS[@]}")
fi
}
complete -o nospace -F _diffstore_completion -o dirnames diffstore

35
src/clean.sh Normal file
View file

@ -0,0 +1,35 @@
# $1 = dir , $2 = type , $3 = amount
clean() {
dir=$1
shift 1
type=$RETENTION_TYPE
if [ $# -gt 0 ] ; then
type=$1
shift 1
fi
clean_$type "$dir" "$@"
}
## clean_TYPE ##
# $1 = dir , $2 = amount
clean_number() {
[ "$LOG" = true ] && rmopts=${rmopts}-v
{
file_difflist "$STOREDIR" && file_latest "$STOREDIR"
} | head -n "-${2-14}" | tr '\n' '\0' | ( cd "$STOREDIR" && xargs -0 rm $rmopts 2>/dev/null)
}
clean_days() {
[ "$LOG" = true ] && rmopts=${rmopts}-v
(
cd "$1"
find . -mindepth 1 -maxdepth 1 -mtime +${2-14} -regex '\./[0-9]*-.*' -print0 | xargs -0 rm $rmopts 2>/dev/null || true
find . -mindepth 1 -maxdepth 1 -mtime +${2-14} -name 'latest-*' -print0 | xargs -0 rm $rmopts 2>/dev/null || true
)
}
clean_all() {
true
}

92
src/compression.sh Normal file
View file

@ -0,0 +1,92 @@
#!/bin/sh
comptypes='
gzip:gz
xz:xz
zstd:zst
'
# $1 = compression
compression_extension() {
printf "%s\n" "$comptypes" | grep -o "^${1-$COMPRESSION_TYPE}:.*$" | cut -d: -f2-
}
# $1 = file
compress() {
compress_stdout "$1" > "$1.$(compression_extension)$(crypt_extension)" && rm "$1"
}
# $1 = file
compress_stdout() {
if [ "$NOBAR" = true ] ; then
cat "$1"
else
pv "$1"
fi | cmp_$COMPRESSION_TYPE $COMPRESSION_LEVEL | encrypt
}
# $1 = file , $2 = engine , $* = options
decompress() {
decompress_stdout "$1" > "${1%.$(compression_extension)$(crypt_extension)}" && rm "$1"
}
# $1 = file , $2 = engine , $* = options
decompress_stdout() {
if [ "$NOBAR" = true ] ; then
cat "$1"
else
pv "$1"
fi | decrypt | dcmp_$COMPRESSION_TYPE $COMPRESSION_LEVEL
}
# $1 = dir , $2 = og engine , $3 = new engine , $4 = level
recompress_all() {
import_crypt
{ file_difflist "$STOREDIR" && file_latest "$STOREDIR"; } | while read -r ln ; do
basefile=${ln%.$(compression_extension)$(crypt_extension)}
outfile=$1/$basefile.$(compression_extension "$3")$(crypt_extension)
tmpoutfile=$1/.$basefile.$(compression_extension "$3")$(crypt_extension)
echo "recompressing '${basefile%.$DIFF_ENGINE}'"
decompress_stdout "$1/$ln" |
NOBAR=true COMPRESSION_TYPE=$3 COMPRESSION_LEVEL=${4--6} compress_stdout /dev/stdin > "$tmpoutfile" || return $?
mts=$(stat -c "%Y" "$1/$ln")
rm "$1/$ln"
mv "$tmpoutfile" "$outfile"
touch -m -d "@$mts" "$outfile"
done
}
## CMP/DCMP functions
cmp_gzip() {
if which pigz >/dev/null 2>&1; then
pigz "$@"
else
gzip "$@"
fi
}
dcmp_gzip() {
gzip -d "$@"
}
cmp_xz() {
if which pixz >/dev/null 2>&1; then
pixz "$@"
else
xz "$@"
fi
}
dcmp_xz() {
xz -d "$@"
}
cmp_zstd() {
zstd -T0 "$@"
}
dcmp_zstd() {
zstd -dT0 "$@"
}

129
src/config.sh Normal file
View file

@ -0,0 +1,129 @@
#!/bin/sh
export_env() {
varlist=$(echo "$CONFIGVARS" | cut -d: -f2- | tr -s ',\n' '\n' | sed '/^$/d;s/^/(/g;s/$/)/g')
set | grep -E "^($(printf "%s" "$varlist" | tr '\n' '|'))=" || true
}
configfile=.diffstoreconfig
CONFIGVARS='
engine:DIFF_ENGINE
retention:RETENTION_TYPE,RETENTION_AMOUNT
compression:COMPRESSION_TYPE,COMPRESSION_LEVEL
encryption:ENCRYPTION
'
# $1 = dir , $2 = vars , $@ = values
config_set() {
local dir=$1 vars=$2
shift 2
I=1
for val
do
varname=$(echo "$vars" | cut -d, -f$I)
export $varname="$val"
I=$((I+1))
done
gen_config > "$dir/$configfile"
}
get_config_var() {
echo "$CONFIGVARS" | grep -o "$1:[A-Za-z_,]*" | cut -d: -f2-
}
# $1 = dir, $2 = conf op, $@ = values
config() {
if [ $# -lt 2 ] ; then
gen_config_human
exit $?
fi
dir=$1
confop=$2
shift 2
case "$confop" in
compression|engine|retention|encryption)
case "$confop" in
compression)
if [ "$COMPRESSION_TYPE$COMPRESSION_LEVEL" != "$1-${2-6}" ] ; then
recompress_all "$dir" "$COMPRESSION_TYPE" "$@"
fi
;;
engine)
if [ -n "$(file_difflist)" ] ; then
file_difflist
echo "Cannot change $confop" >&2
return 1
fi
;;
encryption)
if crypt_different_state "$ENCRYPTION" "$@" ; then
reencrypt_all "$dir" "$COMPRESSION_TYPE" "$@"
fi
esac
config_set "$dir" "$(get_config_var "$confop")" "$@"
;;
env) gen_config ;;
print) gen_config_human ;;
help) config_help ;;
*) usage && exit 1;;
esac
}
# $1 = folder
import_config() {
[ ! -f "$1/$configfile" ] || {
tmpenv=$(TMPDIR=${TMPDIR-/tmp} _lxsh_random_tmpfile diffstore-).env
# stash current env
export_env > "$tmpenv"
# load config
. "$1/$configfile"
# load back env
. "$tmpenv"
rm -f "$tmpenv"
}
}
set_defaults() {
if [ -z "$COMPRESSION_TYPE" ] ; then
if which zstd >/dev/null 2>&1 ; then
COMPRESSION_TYPE=zstd
else
COMPRESSION_TYPE=gz
fi
fi
if [ -z "$DIFF_ENGINE" ] ; then
if which rdiff >/dev/null 2>&1 ; then
DIFF_ENGINE=rdiff
else
DIFF_ENGINE=diff
fi
fi
RETENTION_TYPE=${RETENTION_TYPE-all}
RETENTION_AMOUNT=${RETENTION_AMOUNT-14}
COMPRESSION_LEVEL=${COMPRESSION_LEVEL--6}
ENCRYPTION=off
}
gen_config() {
for I in $(echo "$CONFIGVARS" | cut -d: -f2- | tr ',' ' ') ; do
printf "%s=%s\n" "$I" "${!I}"
done
}
gen_config_human() {
for oneconf in $CONFIGVARS ; do
confname=$(echo "$oneconf" | cut -d: -f1)
vars=$(echo "$oneconf" | cut -d: -f2-)
printf "%-12s:" "$confname"
for var in $(echo "$vars" | tr ',' ' ') ; do
printf " %s" "${!var}"
done
echo
done
}
gen_default_config() {
set_defaults
gen_config > "$1/$configfile"
}

83
src/crypt.sh Normal file
View file

@ -0,0 +1,83 @@
#!/bin/sh
_stop() {
stty echo
}
crypt_different_state() {
if [ "$1" = on ] ; then
[ "$2" != on ]
else
[ "$2" = on ]
fi
}
crypt_extension() {
if [ $# -gt 0 ] ; then
local ENCRYPTION=$1
fi
if [ "$ENCRYPTION" = on ] ; then
echo ".enc"
fi
}
# $1 = prompt
console_prompt_hidden()
{
(
trap _stop INT
local prompt
stty -echo
read -rp "$1" prompt || { stty echo; return 1; }
stty echo
printf "\n" >&2
echo "$prompt"
)
}
import_crypt() {
if [ "$ENCRYPTION" = on ] && [ -z "$CRYPT_PW" ]; then
CRYPT_PW=$(console_prompt_hidden "Password: ")
export CRYPT_PW
fi
}
# $1 = key
encrypt() {
if [ "$ENCRYPTION" = on ] ; then
openssl enc -aes-256-cbc -pbkdf2 -salt -in - -out - -k "$1"
else
cat
fi
}
# $1 = key
decrypt() {
if [ "$ENCRYPTION" = on ] ; then
openssl enc -d -aes-256-cbc -pbkdf2 -in - -out - -k "$1"
else
cat
fi
}
# $1 = dir , $2 = og engine , $3 = new engine
reencrypt_all() {
if [ -n "$(file_difflist "$STOREDIR")$(file_latest "$STOREDIR")" ] ; then
ENCRYPTION=on import_crypt
fi
{ file_difflist "$STOREDIR" && file_latest "$STOREDIR"; } | while read -r ln ; do
basefile=${ln%$(crypt_extension)}
outfile=$1/$basefile$(crypt_extension "$3")
tmpoutfile=$1/.$basefile$(crypt_extension "$3")
adj=Encrypting
if [ "$ENCRYPTION" = on ] ; then
adj=Decrypting
fi
echo "$adj '${basefile%.$DIFF_ENGINE.$(compression_extension)}'"
decompress_stdout "$1/$ln" |
ENCRYPTION=$3 compress_stdout /dev/stdin > "$tmpoutfile" || return $?
rm "$1/$ln"
mv "$tmpoutfile" "$outfile"
done
}

72
src/diff.sh Normal file
View file

@ -0,0 +1,72 @@
#!/bin/sh
### TODO: bsdiff ###
## with bsdiff
# bsdiff older newer patch.bin
# [...]
# bspatch older newer patch.bin
## upside:
# - big compress
## downside:
# - bonkers amount of RAM
## FCT diff_ENGINE ##
# $1 = old file , $2 = new file , $3 = output diff file
diff_diff() {
diff -ua "$1" "$2" > "$3" || true
}
diff_rdiff() {
sigfile=$(_lxsh_random_tmpfile diffstore-).sig
(
set -e
rdiff signature "$1" "$sigfile"
rdiff delta "$sigfile" "$2" "$3"
) || {
rm -rf "$sigfile"
return 1
}
rm -rf "$sigfile"
}
## FCT patch_ENGINE ##
# $1 = dir , $2 = file , $3 = output , stdin = patches
patch_diff() {
dir=$1
infile=$2
outfile=$3
shift 3
cp "$infile" "$outfile"
while read -r filediff ; do
decompress_stdout "$dir/$filediff.$DIFF_ENGINE.$(compression_extension)$(crypt_extension)"
done | patch -s "$outfile"
}
patch_rdiff() {
tmpfile=.diffstoretmp-$(_lxsh_random_string)
tmppipe=$(TMPDIR=${XDG_RUNTIME_DIR-/tmp} _lxsh_random_tmpfile "diffstorefifo_")
(
dir=$1
infile=$2
realout=$3
shift 3
outfile=$tmpfile
mkfifo "$tmppipe"
pv -petls "$1" >/dev/null < "$tmppipe" &
while read -r filediff ; do
decompress_stdout "$dir/$filediff.$DIFF_ENGINE.$(compression_extension)$(crypt_extension)" | rdiff patch "$infile" /dev/stdin "$outfile"
rm "$infile"
mv "$outfile" "$infile"
echo "dummy line"
done > "$tmppipe"
mv "$infile" "$realout"
rm -f "$tmpfile" "$tmppipe"
) || {
stat=$?
rm -f "$tmpfile" "$tmppipe"
return $stat
}
}

42
src/help.sh Normal file
View file

@ -0,0 +1,42 @@
#!/bin/sh
usage() {
cat << EOF
$(basename "$0") <FOLDER> <OPERATION> [ARGS]
Script to store files as differential history.
Intended purpose is to serve as a heavily compressed backup history.
Note that input files need to be unencrypted for it to be effective.
Operations:
list List stored files
add <FILE> [NAME] Add given file into diff store
store <FILE> [NAME] Alias to add
get <FILE> Get original from selected file
clean [TYPE] [AMOUNT] Clean according to retention. Can give a retention type in console
config <OPERATION> Set config on folder. See '$(basename "$0") config help' for details
It is recommended to install 'zstd' and 'rdiff' for optimal speed and compression.
They will be set as default config if found, otherwise will default to gzip/diff.
EOF
}
config_help() {
cat << EOF
$(basename "$0") <FOLDER> config [OPERATION]
View or change configuration of FOLDER
Operations:
help Display this message
print Print config in human readable format
env Print config as environment
engine <TYPE> Engine to use for differentials. Supported: diff, rdiff
retention <TYPE> <AMOUNT> Configure automatic retention.
compression <TYPE> [LEVEL] Configure to use given compression. Supported types: gz, xz, zstd
encryption <on/off> Enable or disable password-based encryption.
> Use env ENCRYPT_PW for automated password input
Retention types:
all Keep everything
number Keep N number of files
days Keep only files modified in the N lays days
EOF
}

22
src/list.sh Normal file
View file

@ -0,0 +1,22 @@
# $1 = dir , $2 = op
list() {
lop=$2
case $lop in
basic) basic_list "$1" ;;
*) default_list "$1" ;;
esac
}
basic_list() {
{ file_difflist "$1" && file_latest "$1" ; } | filename_pipe
}
default_list() {
maxlength=$({ file_difflist "$1" && file_latest "$1" ; } | filename_pipe | wc -L)
printf "%-*s %-36s %s\n" "$maxlength" "File" "Store date" "Disk space"
{ file_difflist "$1" && file_latest "$1"; } | while read -r ln
do
printf " %-*s %-36s %s\n" "$maxlength" "$(filename "$ln")" "$(date '+%F %T %z' -d "@$(stat --printf=%Y "$1/$ln")")" "$(du -hs "$1/$ln" | cut -f1)"
done
}

35
src/main.sh Executable file
View file

@ -0,0 +1,35 @@
#!/usr/bin/env lxsh
[ "$DEBUG" = true ] && set -x
set -e
%include *.sh
if [ $# -lt 2 ] ; then
usage
exit 1
fi
STOREDIR=$1
OP=$2
shift 2
if [ "$OP" = init ] || [ ! -f "$STOREDIR/$configfile" ] ; then
init_store "$STOREDIR"
fi
import_config "$STOREDIR"
case "$OP" in
init) true ;;
ls|list) list "$STOREDIR" "$@" ;;
store|add) add_file "$STOREDIR" "$@" && clean "$STOREDIR" ;;
timeset) timeset_file "$STOREDIR" "$@" ;;
get) get_file "$STOREDIR" "$@" ;;
clean) LOG=true clean "$STOREDIR" "$@" ;;
config)
config "$STOREDIR" "$@"
;;
*) usage && exit 1 ;;
esac

140
src/store.sh Normal file
View file

@ -0,0 +1,140 @@
#!/bin/sh
# $1 = dir
init_store() {
mkdir -p "$1"
rm -rf "$1/*"
gen_default_config "$1"
}
# $1 = dir
file_difflist() {
(
set -e
cd "$1"
find . -mindepth 1 -maxdepth 1 -regex '\./[0-9]*-.*' -printf "%f\n" | sort -n
)
}
# $1 = dir
file_latest() {
(
set -e
cd "$1"
find . -mindepth 1 -maxdepth 1 -name 'latest-*' -printf "%f\n" 2>/dev/null
)
}
# $1 = file
original_filename() {
name=${1%.$(compression_extension)$(crypt_extension)}
name=${name%.$DIFF_ENGINE}
printf "%s\n" "$name" | cut -d- -f2-
}
# $1 = file
filename() {
name=${1%.$(compression_extension)$(crypt_extension)}
name=${name%.$DIFF_ENGINE}
printf "%s\n" "$name"
}
# $1 = file
filename_pipe() {
sed "s/$(crypt_extension)$//g;s/\.$(compression_extension)$//g;s/\.$DIFF_ENGINE$//g"
}
# $1 = dir
getnvalue() {
file_difflist "$1" | tail -n1 | cut -d- -f1
}
# $1 = dir , $2 = input file name
new_difffile() {
printf "%s\n" "$(($(getnvalue "$1")+1))-$2.$DIFF_ENGINE"
}
# $1 = dir , $2 = input file
get_latestname() {
printf "%s\n" "latest-$(basename "$2").$(compression_extension)$(crypt_extension)"
}
# $1 = dir , $2 = file , $3 = name
add_file() {
import_crypt
dir=$1
file=$2
name=${3-"$2"}
tarfile=
latest=$(file_latest "$dir")
newlatest=$dir/$(get_latestname "$dir" "$name")
if [ -d "$file" ] ; then
newlatest=$dir/$(get_latestname "$dir" "$name.tar")
tarfile="$dir/.$name.tar"
tar -cf "$tarfile" "$file"
file=$tarfile
fi
if [ -n "$latest" ] ; then
fname=$(original_filename "$latest")
destfile=$(new_difffile "$dir" "$fname")
echo "> Creating diff"
decompress_stdout "$dir/$latest" | diff_$DIFF_ENGINE "$file" /dev/stdin "$dir/$destfile"
echo "> Compressing diff file"
compress "$dir/$destfile"
rm "$dir/$latest"
fi
echo "> Compressing new file"
compress_stdout "$file" > "$newlatest"
if [ -n "$tarfile" ] ; then
rm "$tarfile"
fi
}
# $1 = dir , $2 = name
resolve_shortname() {
local filetest
case "$2" in
latest|[0-9]*) (
cd "$1"
filetest=$(printf "%s" "$2"-*)
if [ -f "$filetest" ] ; then
printf "%s\n" "$filetest" | filename_pipe
else
printf "%s\n" "$2"
fi
) ;;
*) printf "%s\n" "$2" ;;
esac
}
get_file() {
import_crypt
dir=$1
file=$(resolve_shortname "$dir" "$2")
dest=${3-$file}
latest=$(file_latest "$dir" | filename_pipe)
if [ "$file" = "$latest" ] ; then
echo "> Decompressing"
decompress_stdout "$dir/$(file_latest "$dir")" > "$dest"
else
list=$(file_difflist "$dir" | filename_pipe)
list=$(printf "%s\n" "$list" | grep "^$file$" -A$(echo "$list" | wc -l) | tac)
if [ -z "$list" ]; then
echo "file $file not found" >&2
return 1
fi
echo "> Decompressing"
decompress_stdout "$dir/$(file_latest "$dir")" > ".$dest"
(
set +e
echo "> Patching"
printf "%s\n" "$list" | NOBAR=true patch_$DIFF_ENGINE "$dir" ".$dest" "$dest" "$(printf "%s\n" "$list" | wc -l)"
stat=$?
rm -f ".$dest"
exit $stat
)
fi
}