-
-
Save mdaguete/068488af17a23a10815674d72ec34548 to your computer and use it in GitHub Desktop.
Rolling restart of all nodes in an AKS cluster
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -e | |
resourceGroupDefault='<set your default here, to avoid having to specify in the common case>' | |
resourceGroup=${RESOURCE_GROUP:-$resourceGroupDefault} | |
clusterNameDefault='<set your default here>' | |
clusterName=${CLUSTER_NAME:-$clusterNameDefault} | |
regionDefault='<set your default here>' | |
region=${REGION:-$regionDefault} | |
force=false | |
dryrun=false | |
nodes='' | |
function print_usage() { | |
echo "Usage: $0 [<options>]" | |
echo "" | |
echo "-n|--node <node> The name of a node to restart." | |
echo " By default, a rolling restart of all nodes" | |
echo " is performed." | |
echo "" | |
echo "--resource-group <group-name> The resource group of the cluster." | |
echo " Can also be set by RESOURCE_GROUP" | |
echo " Default: $resourceGroupDefault" | |
echo "" | |
echo "--cluster-name <cluster-name> The name of the cluster." | |
echo " Can also be set by CLUSTER_NAME" | |
echo " Default: $clusterNameDefault" | |
echo "" | |
echo "--region <azure-region> The Azure region in which the cluster is." | |
echo " Can also be set by REGION" | |
echo " Default: $regionDefault" | |
echo "" | |
echo "-f|--force Restart node(s) without first draining." | |
echo " Useful if draining a node fails." | |
echo "" | |
echo "-d|--dry-run Just print what to do; don't actually do it" | |
echo "" | |
echo "-h|--help Print usage and exit." | |
} | |
while [[ $# -gt 0 ]] | |
do | |
key="$1" | |
case $key in | |
-n|--node) | |
node="$2" | |
shift | |
shift | |
;; | |
--resource-group) | |
resourceGroup="$2" | |
shift | |
shift | |
;; | |
--cluster-name) | |
clusterName="$2" | |
shift | |
shift | |
;; | |
--region) | |
region="$2" | |
shift | |
shift | |
;; | |
-f|--force) | |
force=true | |
shift | |
;; | |
--dry-run) | |
dryrun=true | |
shift | |
;; | |
-h|--help) | |
print_usage | |
exit 0 | |
;; | |
*) | |
print_usage | |
exit 1 | |
;; | |
esac | |
done | |
group="MC_${resourceGroup}_${clusterName}_$region" | |
function wait_for_status() { | |
node=$1 | |
reason=$2 | |
i=0 | |
while [[ $i -lt 30 ]]; do | |
status=$(kubectl get node $node -o "jsonpath={.status.conditions[?(.reason==\"$reason\")].type}") | |
if [[ "$status" == "Ready" ]]; then | |
echo "$reason after $((i*2)) seconds" | |
break; | |
else | |
sleep 2s | |
i=$(($i+1)) | |
fi | |
done | |
if [[ $i == 30 ]]; then | |
echo "Error: Did not reach $reason state within 1 minute" | |
exit 1 | |
fi | |
} | |
if [ -z "$node" ]; then | |
nodes=$(kubectl get nodes -o jsonpath={.items[*].metadata.name}) | |
else | |
nodes="$node" | |
fi | |
for node in $nodes; do | |
if $force; then | |
echo "WARNING: --force specified, restarting node $node without draining first" | |
if $dryrun; then | |
echo "kubectl cordon $node" | |
else | |
kubectl cordon "$node" | |
fi | |
else | |
echo "Draining $node..." | |
if $dryrun; then | |
echo "kubectl drain $node --ignore-daemonsets --delete-local-data" | |
else | |
kubectl drain "$node" --ignore-daemonsets --delete-local-data | |
fi | |
fi | |
echo "Initiating VM restart for $node..." | |
if $dryrun; then | |
echo "az vm restart --resource-group $group --name $node" | |
else | |
az vm restart --resource-group "$group" --name "$node" | |
fi | |
if ! $dryrun; then | |
echo "Waiting for $node to start back up..." | |
wait_for_status $node KubeletNotReady | |
wait_for_status $node KubeletReady | |
fi | |
echo "Re-enabling $node for scheduling" | |
if $dryrun; then | |
echo "kubectl uncordon $node" | |
else | |
kubectl uncordon "$node" | |
fi | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment