Created
August 24, 2025 12:17
-
-
Save donnaken15/e4b0a20965203287f03a34ebff3100a8 to your computer and use it in GitHub Desktop.
archive neutralizer (7z/rar/zip/tar/... => lrzip) | WIP
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/zsh -e | |
| # NewtraLZ / new tar.l(r)z | |
| set -o pipefail | |
| #set -x | |
| { # ------------ deeeefines | |
| # KSH_ARRAYS DOESN'T ALLOW EXPANSION WITHOUT [@], | |
| # AND (generally) CAN'T DEFINE ALIASES WITHIN BLOCKS, WTFFFFFFFFFFF | |
| nul='/dev/null'; bools=(true false); LRZIP=NOCONFIG; this="$0"; best=lrzip; die() { | |
| expand -t 4 <<!! | |
| ${this:t:r} - archive neutralizer | |
| usage: | |
| ${this:t:r} [input file] [switches] | |
| options: | |
| --output,-o - set output name | |
| --transform,-f - replace the original file directly when done, | |
| output parameter becomes the temp file, useful | |
| for retaining the creation date | |
| --sanity-chk,-v - verify checksums of source files vs recompressed files | |
| --batch-size,-b - set number of items to process at a time | |
| --max-memory,-m - limit RAM usage by hundreds of MB, for lrzip only | |
| ? [arg] - pass argument to $best, for optargs, you may need | |
| to use this twice, as such: (? --window-size ? 2048) | |
| !! | |
| exit 1 | |
| } | |
| s() { $* >$nul 2>$nul } # silence | |
| PATH=$(echo "$PATH" | sed -e 's/\(\/\(cygdrive\|mnt\)\/c\|c:\)[\\/]windows[\\/]system32[\\/]\?://gI') # i need to die | |
| # grep and extract group 1 | |
| rip() { sed -n "s/\r//g;/$1/s/$1/\\${3:-1}/p" -- "${2:--}" }; hate() { sed '/^$/d;s/\r//g' } | |
| can_run() { which "$1" >$nul 2>$nul }; sort_dirs() { hate | rev | sort | rev } # awful | |
| false && { | |
| free="$(((($(rip "^MemFree: \+\([0-9]\+\).*" /proc/meminfo)/1000000.0)-4.5)>>0))" | |
| [ $free -le 1 ] && { | |
| echo 'not enough memory' | |
| exit 1 | |
| } | |
| } | |
| } | |
| # it's actually getting hard to read these or keep of track of where stuff is, so here i am actually marking stuff |:( | |
| { # ------------ input check | |
| inp="$1" | |
| [ -z "$inp" ] && { | |
| echo "input not specified" | |
| die | |
| } | |
| s shift || die | |
| } | |
| { # ------------ arg checking | |
| passthru=() | |
| # why is FOSS so bad https://stackoverflow.com/questions/12022592 | |
| declare -A longopts=( | |
| [output]=-o | |
| [transform]=-f | |
| [sanity-chk]=-v | |
| [batch-size]=-b | |
| [max-memory]=-m | |
| ) | |
| bs=4 | |
| mm=10 | |
| vf=false | |
| tf=false | |
| why=false | |
| for o in "$@"; do | |
| shift | |
| $why && { | |
| why=false | |
| continue | |
| } | |
| [ "$o" = "?" ] && { | |
| passthru+=("$1") | |
| why=true | |
| } | |
| [[ "$o" =~ ^--(.*) ]] && { | |
| lo="${longopts[${match[1]}]}" | |
| [ -z "$lo" ] && echo "nonexistent option $o" || o="$lo" | |
| } | |
| set -- "$@" "${longopts["$o"]:-"$o"}" | |
| done | |
| unset why | |
| #(IFS=":"; echo ":${}:") | |
| while getopts ':o:b:m:f' opt; do | |
| case $opt in | |
| o) out="$OPTARG" ;; | |
| m) mm="$OPTARG" ;; | |
| b) bs="$OPTARG" ;; | |
| f) tf=true ;; | |
| v) vf=true ;; | |
| :) echo "Missing argument for -$OPTARG"; exit 1 ;; | |
| \?) passthru+=("$OPTARG") ;; | |
| *) echo "Uncaught: $opt, $OPTARG" ;; | |
| esac | |
| done | |
| [ -z "$out" ] && { | |
| out="${inp%%.tar.*}" | |
| for ext in 7z zip rar tar jar tgz ark7z arkxz arkzp zp zpaq; do | |
| out="${out%.$ext}" | |
| done | |
| out="${out}.tar.lrz" | |
| } | |
| } | |
| { # ------------ rooootines | |
| tmpd="$(mktemp -d -t "nwtXXX")" | |
| ktmp() { rm -rf "$tmpd" }; trap ktmp EXIT | |
| piperes() { | |
| local s=(${pipestatus}) x=0; | |
| for ((i=1; i<=${#s[@]}; i++)); do ((x |= s[i])); done | |
| return $x | |
| } | |
| } | |
| { # ------------ detect type | |
| # notepad++ seriously needs syntax nesting for substitutions | |
| test=`if [[ "$(file -b "$inp")" =~ ^(ZPAQ|LRZIP)\ ]]; then | |
| case "${match[1]}" in | |
| ZPAQ) echo 'application/x-zpaq' ;; | |
| LRZIP) echo 'application/x-lrzip' ;; | |
| *) echo 'you suck' 1>&2; exit 1;; | |
| esac | |
| else | |
| file -b --mime-type "$inp" | |
| fi` | |
| test="${test#application/}" | |
| declare -A arktypes=( | |
| [x-bzip2]=bz | |
| [x-lrzip]=lr | |
| [x-lzma]=xz | |
| [x-xz]=xz | |
| [gzip]=gz | |
| [zstd]=zs | |
| [zip]=pk | |
| [x-tar]=tr | |
| [x-zpaq]=zp | |
| [x-7z-compressed]=7z | |
| ) | |
| type="${arktypes[${test}]}" | |
| case "$type" in | |
| 7z) regq() { | |
| reg.exe query "$1" /v "$2" | sed -n "3s/ \{4\}.* \{4\}REG_[^ ]\+ \{4\}//;s/\\\0/\n/g;s/\\\/\//g;s/\r//g;3p" # why so autistic | |
| piperes | |
| } | |
| can_run reg.exe && z7="$(regq 'HKCU\SOFTWARE\7-Zip' Path)/7z.exe" || { | |
| z7=; for x in 7z 7za 7zr; do | |
| can_run $x && { z7=$x; break } | |
| done | |
| } | |
| [ -z "$z7" ] && { echo "7-zip not found"; exit 1 } | |
| ;; | |
| zp) zp=; for x in zpaq zpaqfranz zp; do # how TF do i handle versions | |
| can_run $x && { zp=$x; break } | |
| done | |
| [ -z "$zp" ] && { echo "zpaq not found"; exit 1 } | |
| ;; | |
| pk) can_run unzip || type=tr | |
| ;& | |
| *) cmd="`realpath "${COMSPEC:-C:\\Windows\\System32\\cmd.exe}"`" | |
| [ -f "$cmd" ] && { | |
| winver=`"$cmd" /c ver 2>$nul | rip "^Microsoft Windows \[Version 1[01]\.[0-9]\.\([0-9]\+\)\.[0-9]\+\]$"` | |
| [ ${winver:-0} -ge 17063 ] && wintar="$(dirname "$cmd")/tar.exe" | |
| } | |
| # make diagnostic tool offhanded? that just checks the build no. and if tar exists in system32 | |
| # on an installation around or a version behind minimum, for whatever reason | |
| [ -n "$wintar" ] && tar="$wintar" || tar=tar | |
| ;; | |
| esac | |
| declare -A balls=( # stupid | |
| [lr]=lrzip | |
| [bz]=bzip2 # is the bzcat alias consistently created on all machines/envs that have bzip2 | |
| [gz]=gzip | |
| [zs]=zstd | |
| [xz]=xz # lzma/lzmadec/unlzma might be aliases, shout out Jia Tang | |
| # xz/lzma are apparently distinct types but xz can handle both of them | |
| ) | |
| exe="${balls[$type]}" | |
| { # ------------ most actually autistic pipe | |
| # would be more of the case if i didn't store it in an array | |
| # where it'd (pipe) be less accessible and more difficult to work with my batch idea | |
| if [ -n "$exe" ]; then | |
| can_run "$exe" || { | |
| echo "decompressor $exe not found, exiting" >&2 | |
| exit 1 | |
| } | |
| case "$exe" in | |
| lrzip) "$exe" -dqo - -m $mm "$inp" ;; | |
| *) "$exe" -qcd "$inp" ;; | |
| esac | "$wintar" -tf - | |
| else | |
| case "$type" in | |
| tr) "$tar" -tf "$inp" ;; | |
| 7z) # atrocious | |
| "$z7" l "$inp" -ba -sccUTF-8 | cut -c54- | sed 's/\r//g' ;; | |
| pk) unzip -Z1 "$inp" | |
| ;; | |
| zp) "$zp" l "$inp" 2>$nul | rip "-\? [0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\} [0-9]\{2\}:[0-9]\{2\}:[0-9]\{2\} \+[0-9\.]\+ [A-Z]\+ \+\(.*\)" # DUMB | |
| ;; | |
| *) echo "why $test $type" >&2 | |
| exit 1 | |
| ;; | |
| esac | |
| fi | |
| } | sort_dirs | IFS=$'\n' read -r -d '' -A filelist # why is read returning error code | |
| # AND WHY IS IT RETURNING AN EXTRA ITEM WHEN THERE'S NO NEWLINE @ END | |
| echo "total files: ${#filelist}" | |
| ((test=pipestatus[1]|pipestatus[2])); [ $test -ne 0 ] && exit $test | |
| [ "${#filelist}" = 1 -a -z "${filelist[1]}" ] && { | |
| echo 'no files available' >&2; exit 1 | |
| } | |
| # TODO: batch parameter, extract N files at once and then add for sake of dictionary reading stuff | |
| zipiz_x() { file="$1" && shift && unzip "$file" "$@" -x -d "$tmpd" } | |
| } | |
| # unzip: zip | |
| # unrar: rar | |
| # system32/[bsd]tar: rar, tar, zip, 7z (NO LZMA WHY!!) | |
| # 7z[ar]?: rar, tar, zip, 7z | |
| # zpaq: zpaq :l | |
| # <Commands> | |
| # a : Add files to archive | |
| # b : Benchmark | |
| # d : Delete files from archive | |
| # e : Extract files from archive (without using directory names) | |
| # h : Calculate hash values for files | |
| # i : Show information about supported formats | |
| # l : List contents of archive | |
| # rn : Rename files in archive | |
| # t : Test integrity of archive | |
| # u : Update files to archive | |
| # x : eXtract files with full paths | |
| # <Switches> | |
| # -- : Stop switches and @listfile parsing | |
| # -ai[r[-|0]][m[-|2]][w[-]]{@listfile|!wildcard} : Include archives | |
| # -ax[r[-|0]][m[-|2]][w[-]]{@listfile|!wildcard} : eXclude archives | |
| # -ao{a|s|t|u} : set Overwrite mode | |
| # -an : disable archive_name field | |
| # -bb[0-3] : set output log level | |
| # -bd : disable progress indicator | |
| # -bs{o|e|p}{0|1|2} : set output stream for output/error/progress line | |
| # -bt : show execution time statistics | |
| # -i[r[-|0]][m[-|2]][w[-]]{@listfile|!wildcard} : Include filenames | |
| # -m{Parameters} : set compression Method | |
| # -mmt[N] : set number of CPU threads | |
| # -mx[N] : set compression level: -mx1 (fastest) ... -mx9 (ultra) | |
| # -o{Directory} : set Output directory | |
| # -p{Password} : set Password | |
| # -r[-|0] : Recurse subdirectories for name search | |
| # -sa{a|e|s} : set Archive name mode | |
| # -scc{UTF-8|WIN|DOS} : set charset for console input/output | |
| # -scs{UTF-8|UTF-16LE|UTF-16BE|WIN|DOS|{id}} : set charset for list files | |
| # -scrc[CRC32|CRC64|SHA1|SHA256|*] : set hash function for x, e, h commands | |
| # -sdel : delete files after compression | |
| # -seml[.] : send archive by email | |
| # -sfx[{name}] : Create SFX archive | |
| # -si[{name}] : read data from stdin | |
| # -slp : set Large Pages mode | |
| # -slt : show technical information for l (List) command | |
| # -snh : store hard links as links | |
| # -snl : store symbolic links as links | |
| # -sni : store NT security information | |
| # -sns[-] : store NTFS alternate streams | |
| # -so : write data to stdout | |
| # -spd : disable wildcard matching for file names | |
| # -spe : eliminate duplication of root folder for extract command | |
| # -spf[2] : use fully qualified file paths | |
| # -ssc[-] : set sensitive case mode | |
| # -sse : stop archive creating, if it can't open some input file | |
| # -ssp : do not change Last Access Time of source files while archiving | |
| # -ssw : compress shared files | |
| # -stl : set archive timestamp from the most recently modified file | |
| # -stm{HexMask} : set CPU thread affinity mask (hexadecimal number) | |
| # -stx{Type} : exclude archive type | |
| # -t{Type} : Set type of archive | |
| # -u[-][p#][q#][r#][x#][y#][z#][!newArchiveName] : Update options | |
| # -v{Size}[b|k|m|g] : Create volumes | |
| # -w[{path}] : assign Work directory. Empty path means a temporary directory | |
| # -x[r[-|0]][m[-|2]][w[-]]{@listfile|!wildcard} : eXclude filenames | |
| # -y : assume Yes on all queries |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment