Skip to content

Instantly share code, notes, and snippets.

@donnaken15
Created August 24, 2025 12:17
Show Gist options
  • Select an option

  • Save donnaken15/e4b0a20965203287f03a34ebff3100a8 to your computer and use it in GitHub Desktop.

Select an option

Save donnaken15/e4b0a20965203287f03a34ebff3100a8 to your computer and use it in GitHub Desktop.
archive neutralizer (7z/rar/zip/tar/... => lrzip) | WIP
#!/bin/zsh -e
# NewtraLZ / new tar.l(r)z
set -o pipefail
#set -x
{ # ------------ deeeefines
# KSH_ARRAYS DOESN'T ALLOW EXPANSION WITHOUT [@],
# AND (generally) CAN'T DEFINE ALIASES WITHIN BLOCKS, WTFFFFFFFFFFF
nul='/dev/null'; bools=(true false); LRZIP=NOCONFIG; this="$0"; best=lrzip; die() {
expand -t 4 <<!!
${this:t:r} - archive neutralizer
usage:
${this:t:r} [input file] [switches]
options:
--output,-o - set output name
--transform,-f - replace the original file directly when done,
output parameter becomes the temp file, useful
for retaining the creation date
--sanity-chk,-v - verify checksums of source files vs recompressed files
--batch-size,-b - set number of items to process at a time
--max-memory,-m - limit RAM usage by hundreds of MB, for lrzip only
? [arg] - pass argument to $best, for optargs, you may need
to use this twice, as such: (? --window-size ? 2048)
!!
exit 1
}
s() { $* >$nul 2>$nul } # silence
PATH=$(echo "$PATH" | sed -e 's/\(\/\(cygdrive\|mnt\)\/c\|c:\)[\\/]windows[\\/]system32[\\/]\?://gI') # i need to die
# grep and extract group 1
rip() { sed -n "s/\r//g;/$1/s/$1/\\${3:-1}/p" -- "${2:--}" }; hate() { sed '/^$/d;s/\r//g' }
can_run() { which "$1" >$nul 2>$nul }; sort_dirs() { hate | rev | sort | rev } # awful
false && {
free="$(((($(rip "^MemFree: \+\([0-9]\+\).*" /proc/meminfo)/1000000.0)-4.5)>>0))"
[ $free -le 1 ] && {
echo 'not enough memory'
exit 1
}
}
}
# it's actually getting hard to read these or keep of track of where stuff is, so here i am actually marking stuff |:(
{ # ------------ input check
inp="$1"
[ -z "$inp" ] && {
echo "input not specified"
die
}
s shift || die
}
{ # ------------ arg checking
passthru=()
# why is FOSS so bad https://stackoverflow.com/questions/12022592
declare -A longopts=(
[output]=-o
[transform]=-f
[sanity-chk]=-v
[batch-size]=-b
[max-memory]=-m
)
bs=4
mm=10
vf=false
tf=false
why=false
for o in "$@"; do
shift
$why && {
why=false
continue
}
[ "$o" = "?" ] && {
passthru+=("$1")
why=true
}
[[ "$o" =~ ^--(.*) ]] && {
lo="${longopts[${match[1]}]}"
[ -z "$lo" ] && echo "nonexistent option $o" || o="$lo"
}
set -- "$@" "${longopts["$o"]:-"$o"}"
done
unset why
#(IFS=":"; echo ":${}:")
while getopts ':o:b:m:f' opt; do
case $opt in
o) out="$OPTARG" ;;
m) mm="$OPTARG" ;;
b) bs="$OPTARG" ;;
f) tf=true ;;
v) vf=true ;;
:) echo "Missing argument for -$OPTARG"; exit 1 ;;
\?) passthru+=("$OPTARG") ;;
*) echo "Uncaught: $opt, $OPTARG" ;;
esac
done
[ -z "$out" ] && {
out="${inp%%.tar.*}"
for ext in 7z zip rar tar jar tgz ark7z arkxz arkzp zp zpaq; do
out="${out%.$ext}"
done
out="${out}.tar.lrz"
}
}
{ # ------------ rooootines
tmpd="$(mktemp -d -t "nwtXXX")"
ktmp() { rm -rf "$tmpd" }; trap ktmp EXIT
piperes() {
local s=(${pipestatus}) x=0;
for ((i=1; i<=${#s[@]}; i++)); do ((x |= s[i])); done
return $x
}
}
{ # ------------ detect type
# notepad++ seriously needs syntax nesting for substitutions
test=`if [[ "$(file -b "$inp")" =~ ^(ZPAQ|LRZIP)\ ]]; then
case "${match[1]}" in
ZPAQ) echo 'application/x-zpaq' ;;
LRZIP) echo 'application/x-lrzip' ;;
*) echo 'you suck' 1>&2; exit 1;;
esac
else
file -b --mime-type "$inp"
fi`
test="${test#application/}"
declare -A arktypes=(
[x-bzip2]=bz
[x-lrzip]=lr
[x-lzma]=xz
[x-xz]=xz
[gzip]=gz
[zstd]=zs
[zip]=pk
[x-tar]=tr
[x-zpaq]=zp
[x-7z-compressed]=7z
)
type="${arktypes[${test}]}"
case "$type" in
7z) regq() {
reg.exe query "$1" /v "$2" | sed -n "3s/ \{4\}.* \{4\}REG_[^ ]\+ \{4\}//;s/\\\0/\n/g;s/\\\/\//g;s/\r//g;3p" # why so autistic
piperes
}
can_run reg.exe && z7="$(regq 'HKCU\SOFTWARE\7-Zip' Path)/7z.exe" || {
z7=; for x in 7z 7za 7zr; do
can_run $x && { z7=$x; break }
done
}
[ -z "$z7" ] && { echo "7-zip not found"; exit 1 }
;;
zp) zp=; for x in zpaq zpaqfranz zp; do # how TF do i handle versions
can_run $x && { zp=$x; break }
done
[ -z "$zp" ] && { echo "zpaq not found"; exit 1 }
;;
pk) can_run unzip || type=tr
;&
*) cmd="`realpath "${COMSPEC:-C:\\Windows\\System32\\cmd.exe}"`"
[ -f "$cmd" ] && {
winver=`"$cmd" /c ver 2>$nul | rip "^Microsoft Windows \[Version 1[01]\.[0-9]\.\([0-9]\+\)\.[0-9]\+\]$"`
[ ${winver:-0} -ge 17063 ] && wintar="$(dirname "$cmd")/tar.exe"
}
# make diagnostic tool offhanded? that just checks the build no. and if tar exists in system32
# on an installation around or a version behind minimum, for whatever reason
[ -n "$wintar" ] && tar="$wintar" || tar=tar
;;
esac
declare -A balls=( # stupid
[lr]=lrzip
[bz]=bzip2 # is the bzcat alias consistently created on all machines/envs that have bzip2
[gz]=gzip
[zs]=zstd
[xz]=xz # lzma/lzmadec/unlzma might be aliases, shout out Jia Tang
# xz/lzma are apparently distinct types but xz can handle both of them
)
exe="${balls[$type]}"
{ # ------------ most actually autistic pipe
# would be more of the case if i didn't store it in an array
# where it'd (pipe) be less accessible and more difficult to work with my batch idea
if [ -n "$exe" ]; then
can_run "$exe" || {
echo "decompressor $exe not found, exiting" >&2
exit 1
}
case "$exe" in
lrzip) "$exe" -dqo - -m $mm "$inp" ;;
*) "$exe" -qcd "$inp" ;;
esac | "$wintar" -tf -
else
case "$type" in
tr) "$tar" -tf "$inp" ;;
7z) # atrocious
"$z7" l "$inp" -ba -sccUTF-8 | cut -c54- | sed 's/\r//g' ;;
pk) unzip -Z1 "$inp"
;;
zp) "$zp" l "$inp" 2>$nul | rip "-\? [0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\} [0-9]\{2\}:[0-9]\{2\}:[0-9]\{2\} \+[0-9\.]\+ [A-Z]\+ \+\(.*\)" # DUMB
;;
*) echo "why $test $type" >&2
exit 1
;;
esac
fi
} | sort_dirs | IFS=$'\n' read -r -d '' -A filelist # why is read returning error code
# AND WHY IS IT RETURNING AN EXTRA ITEM WHEN THERE'S NO NEWLINE @ END
echo "total files: ${#filelist}"
((test=pipestatus[1]|pipestatus[2])); [ $test -ne 0 ] && exit $test
[ "${#filelist}" = 1 -a -z "${filelist[1]}" ] && {
echo 'no files available' >&2; exit 1
}
# TODO: batch parameter, extract N files at once and then add for sake of dictionary reading stuff
zipiz_x() { file="$1" && shift && unzip "$file" "$@" -x -d "$tmpd" }
}
# unzip: zip
# unrar: rar
# system32/[bsd]tar: rar, tar, zip, 7z (NO LZMA WHY!!)
# 7z[ar]?: rar, tar, zip, 7z
# zpaq: zpaq :l
# <Commands>
# a : Add files to archive
# b : Benchmark
# d : Delete files from archive
# e : Extract files from archive (without using directory names)
# h : Calculate hash values for files
# i : Show information about supported formats
# l : List contents of archive
# rn : Rename files in archive
# t : Test integrity of archive
# u : Update files to archive
# x : eXtract files with full paths
# <Switches>
# -- : Stop switches and @listfile parsing
# -ai[r[-|0]][m[-|2]][w[-]]{@listfile|!wildcard} : Include archives
# -ax[r[-|0]][m[-|2]][w[-]]{@listfile|!wildcard} : eXclude archives
# -ao{a|s|t|u} : set Overwrite mode
# -an : disable archive_name field
# -bb[0-3] : set output log level
# -bd : disable progress indicator
# -bs{o|e|p}{0|1|2} : set output stream for output/error/progress line
# -bt : show execution time statistics
# -i[r[-|0]][m[-|2]][w[-]]{@listfile|!wildcard} : Include filenames
# -m{Parameters} : set compression Method
# -mmt[N] : set number of CPU threads
# -mx[N] : set compression level: -mx1 (fastest) ... -mx9 (ultra)
# -o{Directory} : set Output directory
# -p{Password} : set Password
# -r[-|0] : Recurse subdirectories for name search
# -sa{a|e|s} : set Archive name mode
# -scc{UTF-8|WIN|DOS} : set charset for console input/output
# -scs{UTF-8|UTF-16LE|UTF-16BE|WIN|DOS|{id}} : set charset for list files
# -scrc[CRC32|CRC64|SHA1|SHA256|*] : set hash function for x, e, h commands
# -sdel : delete files after compression
# -seml[.] : send archive by email
# -sfx[{name}] : Create SFX archive
# -si[{name}] : read data from stdin
# -slp : set Large Pages mode
# -slt : show technical information for l (List) command
# -snh : store hard links as links
# -snl : store symbolic links as links
# -sni : store NT security information
# -sns[-] : store NTFS alternate streams
# -so : write data to stdout
# -spd : disable wildcard matching for file names
# -spe : eliminate duplication of root folder for extract command
# -spf[2] : use fully qualified file paths
# -ssc[-] : set sensitive case mode
# -sse : stop archive creating, if it can't open some input file
# -ssp : do not change Last Access Time of source files while archiving
# -ssw : compress shared files
# -stl : set archive timestamp from the most recently modified file
# -stm{HexMask} : set CPU thread affinity mask (hexadecimal number)
# -stx{Type} : exclude archive type
# -t{Type} : Set type of archive
# -u[-][p#][q#][r#][x#][y#][z#][!newArchiveName] : Update options
# -v{Size}[b|k|m|g] : Create volumes
# -w[{path}] : assign Work directory. Empty path means a temporary directory
# -x[r[-|0]][m[-|2]][w[-]]{@listfile|!wildcard} : eXclude filenames
# -y : assume Yes on all queries
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment