Created
March 21, 2026 19:04
-
-
Save tiborvass/86c92d3b9081d5af828c47fbfbbea70f to your computer and use it in GitHub Desktop.
Internet connectivity-checking network failover script
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| set -euo pipefail | |
| # --- config --- | |
| PRIMARY_DEV="eno1" | |
| PRIMARY_CONN="Wired connection 1" | |
| BACKUP_CONN="YOUR_WIFI_CONNECTION_NAME" | |
| PRIMARY_METRIC_NORMAL=100 | |
| BACKUP_METRIC_NORMAL=600 | |
| PRIMARY_METRIC_FAILOVER=900 | |
| BACKUP_METRIC_FAILOVER=50 | |
| FAIL_THRESHOLD=3 | |
| RECOVER_THRESHOLD=2 | |
| PING_COUNT=1 | |
| PING_TIMEOUT=1 | |
| EXT_PROBES=("1.1.1.1" "8.8.8.8") | |
| STATE_DIR="/run/failover-check" | |
| STATE_FILE="$STATE_DIR/state" | |
| DEBUG="${DEBUG:-0}" | |
| SYSLOG_TAG="failover-check" | |
| # -------------- | |
| mkdir -p "$STATE_DIR" | |
| debug() { | |
| [[ "$DEBUG" == "1" ]] && printf '%s\n' "$*" | |
| } | |
| event_log() { | |
| printf '%s\n' "$1" | |
| logger -t "$SYSLOG_TAG" -- "$1" | |
| } | |
| state_init() { | |
| [[ -f "$STATE_FILE" ]] || cat >"$STATE_FILE" <<-'EOF' | |
| FAIL_COUNT=0 | |
| OK_COUNT=0 | |
| MODE=normal | |
| LAST_STATE=init | |
| EOF | |
| } | |
| state_load() { | |
| # shellcheck disable=SC1090 | |
| source "$STATE_FILE" | |
| } | |
| state_save() { | |
| cat >"$STATE_FILE" <<-EOF | |
| FAIL_COUNT=$FAIL_COUNT | |
| OK_COUNT=$OK_COUNT | |
| MODE=$MODE | |
| LAST_STATE=$LAST_STATE | |
| EOF | |
| } | |
| transition_to() { | |
| local new_state="$1" | |
| local msg="$2" | |
| if [[ "$LAST_STATE" != "$new_state" ]]; then | |
| event_log "$msg" | |
| LAST_STATE="$new_state" | |
| state_save | |
| else | |
| debug "state unchanged: $new_state" | |
| fi | |
| } | |
| nm_device_connected() { | |
| local state | |
| state="$(nmcli -t -f GENERAL.STATE device show "$PRIMARY_DEV" | cut -d: -f2- || true)" | |
| debug "nm state for $PRIMARY_DEV: ${state:-<empty>}" | |
| [[ "$state" == 100*"(connected)"* ]] | |
| } | |
| has_default_route_on_primary() { | |
| local routes | |
| routes="$(ip route show default dev "$PRIMARY_DEV" || true)" | |
| debug "default routes on $PRIMARY_DEV: ${routes:-<none>}" | |
| grep -q '^default' <<<"$routes" | |
| } | |
| get_gateway() { | |
| ip route show default dev "$PRIMARY_DEV" | awk '/^default/ {print $3; exit}' | |
| } | |
| probe_host() { | |
| local host="$1" | |
| debug "probing via $PRIMARY_DEV -> $host" | |
| ping -I "$PRIMARY_DEV" -c "$PING_COUNT" -W "$PING_TIMEOUT" "$host" >/dev/null 2>&1 | |
| } | |
| healthy_primary() { | |
| local gw host | |
| gw="$(get_gateway)" | |
| debug "gateway for $PRIMARY_DEV: ${gw:-<none>}" | |
| [[ -n "$gw" ]] || return 1 | |
| if ! probe_host "$gw"; then | |
| debug "gateway probe failed: $gw" | |
| return 1 | |
| fi | |
| debug "gateway probe passed: $gw" | |
| for host in "${EXT_PROBES[@]}"; do | |
| if probe_host "$host"; then | |
| debug "external probe passed: $host" | |
| return 0 | |
| fi | |
| debug "external probe failed: $host" | |
| done | |
| return 1 | |
| } | |
| set_normal_metrics() { | |
| debug "setting normal metrics: $PRIMARY_CONN=$PRIMARY_METRIC_NORMAL $BACKUP_CONN=$BACKUP_METRIC_NORMAL" | |
| nmcli connection modify "$PRIMARY_CONN" ipv4.route-metric "$PRIMARY_METRIC_NORMAL" ipv6.route-metric "$PRIMARY_METRIC_NORMAL" | |
| nmcli connection modify "$BACKUP_CONN" ipv4.route-metric "$BACKUP_METRIC_NORMAL" ipv6.route-metric "$BACKUP_METRIC_NORMAL" | |
| } | |
| set_failover_metrics() { | |
| debug "setting failover metrics: $PRIMARY_CONN=$PRIMARY_METRIC_FAILOVER $BACKUP_CONN=$BACKUP_METRIC_FAILOVER" | |
| nmcli connection modify "$PRIMARY_CONN" ipv4.route-metric "$PRIMARY_METRIC_FAILOVER" ipv6.route-metric "$PRIMARY_METRIC_FAILOVER" | |
| nmcli connection modify "$BACKUP_CONN" ipv4.route-metric "$BACKUP_METRIC_FAILOVER" ipv6.route-metric "$BACKUP_METRIC_FAILOVER" | |
| } | |
| activate_backup() { | |
| debug "activating backup connection: $BACKUP_CONN" | |
| nmcli connection up "$BACKUP_CONN" >/dev/null 2>&1 || true | |
| } | |
| reactivate_primary() { | |
| debug "reactivating primary connection: $PRIMARY_CONN" | |
| nmcli connection up "$PRIMARY_CONN" >/dev/null 2>&1 || true | |
| } | |
| enter_failover() { | |
| [[ "$MODE" == "failover" ]] && return 0 | |
| set_failover_metrics | |
| activate_backup | |
| reactivate_primary | |
| MODE="failover" | |
| FAIL_COUNT=0 | |
| OK_COUNT=0 | |
| } | |
| exit_failover() { | |
| [[ "$MODE" == "normal" ]] && return 0 | |
| set_normal_metrics | |
| reactivate_primary | |
| activate_backup | |
| MODE="normal" | |
| FAIL_COUNT=0 | |
| OK_COUNT=0 | |
| } | |
| main() { | |
| state_init | |
| state_load | |
| debug "starting run: MODE=$MODE LAST_STATE=$LAST_STATE FAIL_COUNT=$FAIL_COUNT OK_COUNT=$OK_COUNT" | |
| if ! nm_device_connected; then | |
| FAIL_COUNT=0 | |
| OK_COUNT=0 | |
| if [[ "$MODE" == "failover" ]]; then | |
| exit_failover | |
| fi | |
| transition_to \ | |
| "primary_disconnected" \ | |
| "state transition: primary disconnected on $PRIMARY_DEV; leaving route choice to NetworkManager" | |
| exit 0 | |
| fi | |
| if ! has_default_route_on_primary; then | |
| FAIL_COUNT=0 | |
| OK_COUNT=0 | |
| if [[ "$MODE" == "failover" ]]; then | |
| exit_failover | |
| fi | |
| transition_to \ | |
| "primary_no_default_route" \ | |
| "state transition: primary connected but has no default route on $PRIMARY_DEV; leaving route choice to NetworkManager" | |
| exit 0 | |
| fi | |
| if healthy_primary; then | |
| OK_COUNT=$((OK_COUNT + 1)) | |
| FAIL_COUNT=0 | |
| debug "primary healthy: ok_count=$OK_COUNT mode=$MODE" | |
| if [[ "$MODE" == "failover" ]]; then | |
| transition_to \ | |
| "primary_recovering" \ | |
| "state transition: primary connectivity recovered on $PRIMARY_DEV; waiting for recovery threshold ($OK_COUNT/$RECOVER_THRESHOLD)" | |
| state_save | |
| if (( OK_COUNT >= RECOVER_THRESHOLD )); then | |
| exit_failover | |
| transition_to \ | |
| "primary_healthy" \ | |
| "state transition: primary healthy on $PRIMARY_DEV; exiting failover and preferring $PRIMARY_CONN" | |
| fi | |
| else | |
| transition_to \ | |
| "primary_healthy" \ | |
| "state transition: primary healthy on $PRIMARY_DEV" | |
| fi | |
| state_save | |
| exit 0 | |
| fi | |
| FAIL_COUNT=$((FAIL_COUNT + 1)) | |
| OK_COUNT=0 | |
| debug "primary unhealthy: fail_count=$FAIL_COUNT mode=$MODE" | |
| if [[ "$MODE" == "normal" ]]; then | |
| transition_to \ | |
| "primary_degrading" \ | |
| "state transition: primary connectivity degraded on $PRIMARY_DEV; waiting for failover threshold ($FAIL_COUNT/$FAIL_THRESHOLD)" | |
| state_save | |
| if (( FAIL_COUNT >= FAIL_THRESHOLD )); then | |
| enter_failover | |
| transition_to \ | |
| "failover_active" \ | |
| "state transition: primary unhealthy on $PRIMARY_DEV; entering failover and preferring $BACKUP_CONN" | |
| fi | |
| else | |
| transition_to \ | |
| "failover_active" \ | |
| "state transition: failover remains active; primary still unhealthy on $PRIMARY_DEV" | |
| fi | |
| state_save | |
| } | |
| main "$@" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment