Last active
August 24, 2023 20:29
-
-
Save karolba/fb4559fa34786a630fed3a1392af2a12 to your computer and use it in GitHub Desktop.
A very quick-and-dirty strace to json process tree parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/zsh | |
# Generate strace.* files like that: | |
# strace -o strace -D -qqq --no-abbrev -y -Y -s 16384 -ff -ttt --seccomp-bpf -e "trace=fork,vfork,clone,fchdir,chdir,/exec*" -e signal=none [...] | |
emulate sh | |
# Speed up grep and other similar tools | |
export LC_ALL=C | |
first=$(echo strace.* | tr ' ' '\n' | sort --version-sort | head -n1 | sed 's/^strace\.//') | |
declare -A proc_parent=() | |
declare -A proc_children=() | |
declare -A proc_last_known_envp=() | |
declare -A proc_directory=( | |
["$first"]="$PWD" | |
) | |
declare -A proc_execve_calls_json_lines=() | |
declare -A proc_execve_calls_json_line_count=() | |
count() { echo $#; } | |
# busybox sort and comm are for some reason faster for this script than the GNU ones | |
if command -v busybox >/dev/null; then | |
grep() { busybox grep "$@"; } | |
comm() { busybox comm "$@"; } | |
fi | |
sort_envp() { | |
# bash: | |
#printf '%s\n' "$1" | grep -Eo '"(\\.|[^"\\])*"' | sort -S 10% | |
# faster version for zsh | |
printf '%s\n' "${(n@f)$(printf '%s\n' "$1" | grep -Eo '"(\\.|[^"\\])*"')}" | |
} | |
json_lines_to_json_array() { | |
printf '%s' "${1//$'\n'/,}" | |
} | |
while read -r _time type process data; do | |
case "$type" in | |
fork) | |
parent=$process | |
child=$data | |
proc_parent[$child]="$parent" | |
proc_directory[$child]="${proc_directory[$parent]}" | |
proc_last_known_envp[$child]="${proc_last_known_envp[$parent]}" | |
proc_children[$parent]="${proc_children[$parent]} $child" | |
;; | |
chdir) | |
new_dir=$data | |
proc_directory[$process]="$new_dir" | |
;; | |
execve) | |
execve_call=$data | |
# for bash: | |
#prog=$(printf '%s\n' "$execve_call" | grep -Eo '"(\\.|[^"\\])*"' | head -n1) | |
#mapfile -t < <(printf '%s\n' "$execve_call" | grep -Eo '[[](,| |"(\\.|[^"\\])*")*[]]') | |
#argv=${MAPFILE[0]} | |
#envp=$(sort_envp "${MAPFILE[1]}") | |
# for zsh: | |
prog_argv_and_envp=("${(@f)$(printf '%s\n' "$execve_call" | grep -Eo '[[](,| |"(\\.|[^"\\])*")*[]]|"(\\.|[^"\\])*"')}") | |
prog=${prog_argv_and_envp[0]} | |
argv=${prog_argv_and_envp[1]} | |
envp=$(sort_envp "${prog_argv_and_envp[2]}") | |
parent_envp=${proc_last_known_envp[$process]} | |
proc_last_known_envp[$process]="$envp" | |
if [[ "$envp" == "$parent_envp" ]]; then | |
envp_new= | |
envp_deleted= | |
else | |
envp_new=$(comm -13 <(printf '%s\n' "$parent_envp") <(printf '%s\n' "$envp")) | |
envp_deleted=$(comm -23 <(printf '%s\n' "$parent_envp") <(printf '%s\n' "$envp")) | |
fi | |
directory=${proc_directory[$process]} | |
envp_new_part=${envp_new:+"$(printf ', "envp-new": [%s]' "$(json_lines_to_json_array "$envp_new")")"} | |
envp_deleted_part=${envp_deleted:+"$(printf ', "envp-deleted": [%s]' "$(json_lines_to_json_array "$envp_deleted")")"} | |
json=$(printf '{"prog": %s, "argv": %s, "dir": "%s" %s %s}\n' \ | |
"$prog" "$argv" "$directory" \ | |
"$envp_new_part" "$envp_deleted_part") | |
proc_execve_calls_json_lines[$process]=$(printf '%s\n' ${proc_execve_calls_json_lines[$process]+"${proc_execve_calls_json_lines[$process]}"} "$json") | |
(( proc_execve_calls_json_line_count[$process]++ )) | |
;; | |
*) echo "Unknown type in $_time $type $arg1 $arg2"; exit 1 ;; | |
esac | |
done < <(sort -m -k 1 \ | |
<(grep -E '^[0-9.]* (clone|fork|vfork)' strace.* | awk '$2~/^(clone|fork|vfork)/' | sed -r 's/([0-9])(<[^>]*>)/\1/' | awk -F '[: ]' '{sub("^strace.","");} {print $2, "fork", $1, $NF}' | sort -k 1) \ | |
<(grep -E '^[0-9.]* f?chdir' strace.* | awk '{sub("^strace\\.","");sub(":", " ")} $3~/^f?chdir/ && $NF == 0' | sed 's/("/ /;s/") = 0$//' | awk 'function set123(a,b,c){$1=a;$2=b;$3=c}{set123($2, $3, $1); print}' | sort -k 1) \ | |
<(grep -E '^[0-9.]* exec' strace.* | awk '$2~/^exec/ && / = 0$/{sub("^strace\\.",""); print}' | sed 's/:/ /' | awk '{$1=($2 " execve " $1); $2=""; print}' | sort -k 1) | |
) | |
declare -r unneccessary_node_return=5 | |
visit() { | |
local pid=$1 | |
local execve_count=$((proc_execve_calls_json_line_count[$pid])) | |
local how_many_children=$(count ${proc_children[$pid]}) | |
if ((how_many_children == 0 && execve_count == 0)); then | |
return $unneccessary_node_return | |
fi | |
if ((how_many_children == 1 && execve_count == 0)); then | |
visit ${proc_children[$pid]} | |
return 0 | |
fi | |
local children | |
children=$( | |
ret=$unneccessary_node_return | |
for child in ${proc_children[$pid]}; do | |
visit $child | |
if [[ $? == 0 ]]; then | |
ret=0 | |
fi | |
done | |
exit $ret | |
) | |
if ((execve_count == 0 && $? == unneccessary_node_return)); then | |
return $unneccessary_node_return | |
fi | |
if ((execve_count > 0)) || [[ $children ]]; then | |
printf '{\n' | |
case "$execve_count" in | |
0) ;; | |
1) printf '**%s,\n' "${proc_execve_calls_json_lines[$pid]}" ;; | |
*) printf '"execves": [%s],\n' "$(json_lines_to_json_array "${proc_execve_calls_json_lines[$pid]}")" ;; | |
esac | |
if [[ $children ]]; then | |
printf '"children": [%s],\n' "$children" | |
fi | |
printf '},\n' | |
else | |
return $unneccessary_node_return | |
fi | |
} | |
{ | |
echo 'import json' | |
echo 'def j(data):' | |
echo ' print(json.dumps(data, indent=" "))' | |
echo 'j(' | |
visit "$first" | |
echo ')' | |
} | python3 | python3 -c ' | |
import json | |
def envp_walk(name, node): | |
if name in node and isinstance(node[name], list): | |
new_envp_new = {} | |
for e in node[name]: | |
varname, varvalue = e.split("=", 1) | |
new_envp_new[varname] = varvalue | |
node[name] = new_envp_new | |
if "execves" in node: | |
node["execves"] = [envp_walk(name, child) for child in node["execves"]] | |
if "children" in node: | |
node["children"] = [envp_walk(name, child) for child in node["children"]] | |
return node | |
def reduce_walk(node): | |
if "children" in node: | |
new_children = [] | |
for child in node["children"]: | |
if len(child.keys()) == 1 and "children" in child: | |
new_children.extend(reduce_walk(grandchild) for grandchild in child["children"]) | |
else: | |
new_children.append(reduce_walk(child)) | |
node["children"] = new_children | |
return node | |
with open("/dev/stdin", "r") as file: | |
tree = json.load(file) | |
tree = envp_walk("envp-new", tree) | |
tree = envp_walk("envp-deleted", tree) | |
previous_json_dump = "" | |
while True: | |
tree = reduce_walk(tree) | |
json_dump = json.dumps(tree, indent=2) | |
if json_dump == previous_json_dump: | |
print(json_dump) | |
break | |
previous_json_dump = json_dump' | yq -P | |
# vim: sw=4 ts=4 sts=4 et |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment