Skip to content

Instantly share code, notes, and snippets.

@tiborvass
Created March 21, 2026 19:04
Show Gist options
  • Select an option

  • Save tiborvass/86c92d3b9081d5af828c47fbfbbea70f to your computer and use it in GitHub Desktop.

Select an option

Save tiborvass/86c92d3b9081d5af828c47fbfbbea70f to your computer and use it in GitHub Desktop.
Internet connectivity-checking network failover script
#!/usr/bin/env bash
set -euo pipefail
# --- config ---
PRIMARY_DEV="eno1"
PRIMARY_CONN="Wired connection 1"
BACKUP_CONN="YOUR_WIFI_CONNECTION_NAME"
PRIMARY_METRIC_NORMAL=100
BACKUP_METRIC_NORMAL=600
PRIMARY_METRIC_FAILOVER=900
BACKUP_METRIC_FAILOVER=50
FAIL_THRESHOLD=3
RECOVER_THRESHOLD=2
PING_COUNT=1
PING_TIMEOUT=1
EXT_PROBES=("1.1.1.1" "8.8.8.8")
STATE_DIR="/run/failover-check"
STATE_FILE="$STATE_DIR/state"
DEBUG="${DEBUG:-0}"
SYSLOG_TAG="failover-check"
# --------------
mkdir -p "$STATE_DIR"
debug() {
[[ "$DEBUG" == "1" ]] && printf '%s\n' "$*"
}
event_log() {
printf '%s\n' "$1"
logger -t "$SYSLOG_TAG" -- "$1"
}
state_init() {
[[ -f "$STATE_FILE" ]] || cat >"$STATE_FILE" <<-'EOF'
FAIL_COUNT=0
OK_COUNT=0
MODE=normal
LAST_STATE=init
EOF
}
state_load() {
# shellcheck disable=SC1090
source "$STATE_FILE"
}
state_save() {
cat >"$STATE_FILE" <<-EOF
FAIL_COUNT=$FAIL_COUNT
OK_COUNT=$OK_COUNT
MODE=$MODE
LAST_STATE=$LAST_STATE
EOF
}
transition_to() {
local new_state="$1"
local msg="$2"
if [[ "$LAST_STATE" != "$new_state" ]]; then
event_log "$msg"
LAST_STATE="$new_state"
state_save
else
debug "state unchanged: $new_state"
fi
}
nm_device_connected() {
local state
state="$(nmcli -t -f GENERAL.STATE device show "$PRIMARY_DEV" | cut -d: -f2- || true)"
debug "nm state for $PRIMARY_DEV: ${state:-<empty>}"
[[ "$state" == 100*"(connected)"* ]]
}
has_default_route_on_primary() {
local routes
routes="$(ip route show default dev "$PRIMARY_DEV" || true)"
debug "default routes on $PRIMARY_DEV: ${routes:-<none>}"
grep -q '^default' <<<"$routes"
}
get_gateway() {
ip route show default dev "$PRIMARY_DEV" | awk '/^default/ {print $3; exit}'
}
probe_host() {
local host="$1"
debug "probing via $PRIMARY_DEV -> $host"
ping -I "$PRIMARY_DEV" -c "$PING_COUNT" -W "$PING_TIMEOUT" "$host" >/dev/null 2>&1
}
healthy_primary() {
local gw host
gw="$(get_gateway)"
debug "gateway for $PRIMARY_DEV: ${gw:-<none>}"
[[ -n "$gw" ]] || return 1
if ! probe_host "$gw"; then
debug "gateway probe failed: $gw"
return 1
fi
debug "gateway probe passed: $gw"
for host in "${EXT_PROBES[@]}"; do
if probe_host "$host"; then
debug "external probe passed: $host"
return 0
fi
debug "external probe failed: $host"
done
return 1
}
set_normal_metrics() {
debug "setting normal metrics: $PRIMARY_CONN=$PRIMARY_METRIC_NORMAL $BACKUP_CONN=$BACKUP_METRIC_NORMAL"
nmcli connection modify "$PRIMARY_CONN" ipv4.route-metric "$PRIMARY_METRIC_NORMAL" ipv6.route-metric "$PRIMARY_METRIC_NORMAL"
nmcli connection modify "$BACKUP_CONN" ipv4.route-metric "$BACKUP_METRIC_NORMAL" ipv6.route-metric "$BACKUP_METRIC_NORMAL"
}
set_failover_metrics() {
debug "setting failover metrics: $PRIMARY_CONN=$PRIMARY_METRIC_FAILOVER $BACKUP_CONN=$BACKUP_METRIC_FAILOVER"
nmcli connection modify "$PRIMARY_CONN" ipv4.route-metric "$PRIMARY_METRIC_FAILOVER" ipv6.route-metric "$PRIMARY_METRIC_FAILOVER"
nmcli connection modify "$BACKUP_CONN" ipv4.route-metric "$BACKUP_METRIC_FAILOVER" ipv6.route-metric "$BACKUP_METRIC_FAILOVER"
}
activate_backup() {
debug "activating backup connection: $BACKUP_CONN"
nmcli connection up "$BACKUP_CONN" >/dev/null 2>&1 || true
}
reactivate_primary() {
debug "reactivating primary connection: $PRIMARY_CONN"
nmcli connection up "$PRIMARY_CONN" >/dev/null 2>&1 || true
}
enter_failover() {
[[ "$MODE" == "failover" ]] && return 0
set_failover_metrics
activate_backup
reactivate_primary
MODE="failover"
FAIL_COUNT=0
OK_COUNT=0
}
exit_failover() {
[[ "$MODE" == "normal" ]] && return 0
set_normal_metrics
reactivate_primary
activate_backup
MODE="normal"
FAIL_COUNT=0
OK_COUNT=0
}
main() {
state_init
state_load
debug "starting run: MODE=$MODE LAST_STATE=$LAST_STATE FAIL_COUNT=$FAIL_COUNT OK_COUNT=$OK_COUNT"
if ! nm_device_connected; then
FAIL_COUNT=0
OK_COUNT=0
if [[ "$MODE" == "failover" ]]; then
exit_failover
fi
transition_to \
"primary_disconnected" \
"state transition: primary disconnected on $PRIMARY_DEV; leaving route choice to NetworkManager"
exit 0
fi
if ! has_default_route_on_primary; then
FAIL_COUNT=0
OK_COUNT=0
if [[ "$MODE" == "failover" ]]; then
exit_failover
fi
transition_to \
"primary_no_default_route" \
"state transition: primary connected but has no default route on $PRIMARY_DEV; leaving route choice to NetworkManager"
exit 0
fi
if healthy_primary; then
OK_COUNT=$((OK_COUNT + 1))
FAIL_COUNT=0
debug "primary healthy: ok_count=$OK_COUNT mode=$MODE"
if [[ "$MODE" == "failover" ]]; then
transition_to \
"primary_recovering" \
"state transition: primary connectivity recovered on $PRIMARY_DEV; waiting for recovery threshold ($OK_COUNT/$RECOVER_THRESHOLD)"
state_save
if (( OK_COUNT >= RECOVER_THRESHOLD )); then
exit_failover
transition_to \
"primary_healthy" \
"state transition: primary healthy on $PRIMARY_DEV; exiting failover and preferring $PRIMARY_CONN"
fi
else
transition_to \
"primary_healthy" \
"state transition: primary healthy on $PRIMARY_DEV"
fi
state_save
exit 0
fi
FAIL_COUNT=$((FAIL_COUNT + 1))
OK_COUNT=0
debug "primary unhealthy: fail_count=$FAIL_COUNT mode=$MODE"
if [[ "$MODE" == "normal" ]]; then
transition_to \
"primary_degrading" \
"state transition: primary connectivity degraded on $PRIMARY_DEV; waiting for failover threshold ($FAIL_COUNT/$FAIL_THRESHOLD)"
state_save
if (( FAIL_COUNT >= FAIL_THRESHOLD )); then
enter_failover
transition_to \
"failover_active" \
"state transition: primary unhealthy on $PRIMARY_DEV; entering failover and preferring $BACKUP_CONN"
fi
else
transition_to \
"failover_active" \
"state transition: failover remains active; primary still unhealthy on $PRIMARY_DEV"
fi
state_save
}
main "$@"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment