Skip to content

Instantly share code, notes, and snippets.

@srbhklkrn
Created January 12, 2021 15:37
Show Gist options
  • Save srbhklkrn/9573d2dc73880ddfc6439b5c69e9f944 to your computer and use it in GitHub Desktop.
Save srbhklkrn/9573d2dc73880ddfc6439b5c69e9f944 to your computer and use it in GitHub Desktop.
; Elastic Master Nodes
es_master01 ansible_host=master01 ansible_user=root ip=10.64.20.204
es_master02 ansible_host=master02 ansible_user=root ip=10.64.20.205
es_master03 ansible_host=master03 ansible_user=root ip=10.64.20.206
; Elastic Data Nodes
es_data01 ansible_host=data01 ansible_user=root ip=10.64.20.209
es_data02 ansible_host=data02 ansible_user=root ip=10.64.20.210
es_data03 ansible_host=data03 ansible_user=root ip=10.64.20.211
es_data04 ansible_host=data04 ansible_user=root ip=10.64.20.212
; Elastic Coordinating and Kibana Node
kibana01 ansible_host=kib01 ansible_user=root ip=10.64.20.200
[es_master]
es_master01
es_master02
es_master03
[es_data]
es_data01
es_data02
es_data03
es_data04
[es_cluster:children]
es_master
es_data
[kibana]
kibana01
# Ansible script to restart EFK(Elastic, Fluentd, Kibana) Stack
# Change username and password in respective tasks
# Enter appropriate number of nodes in your elastic cluster in `result.json.number_of_nodes == 7`
# Add a cron job using `crontab -e`
---
- name: Restart Elastic Data Nodes
hosts: es_data
serial: 1
remote_user: root
tags: "first"
tasks:
- name: Start rolling restart for {{ansible_host}}
debug:
msg: "Rolling restart of node {{ansible_hostname}} initiated"
changed_when: true
- name: Check for cluster status until its Green
uri:
url: "https://{{ansible_host}}:9200/_cluster/health?pretty"
follow_redirects: none
method: GET
user: "<username>"
password: "<password>"
force_basic_auth: yes
validate_certs: no
register: result
until: result.json.status == "green" and result.json.number_of_nodes == 7
retries: 360 # 360 * 10 seconds = 1hour (60*60/10)
delay: 10 # Every 10 seconds
- name: Disable shard allocation.
uri:
url: "https://{{ansible_host}}:9200/_cluster/settings?pretty"
follow_redirects: none
method: PUT
user: "<username>"
password: "<password>"
headers:
Content-Type: application/json
body: >
{ "persistent": { "cluster.routing.allocation.enable": "primaries" } }
body_format: json
force_basic_auth: yes
validate_certs: no
register: result
until: result.json.acknowledged == true
retries: 360 # 360 * 10 seconds = 1hour (60*60/10)
delay: 10 # Every 10 seconds
run_once: true
notify:
- Stop service elasticsearch
- name: Stop service elasticsearch
service:
name: elasticsearch
state: stopped
- name: Stop service metricbeat
service:
name: metricbeat
state: stopped
- name: reboot system
reboot:
msg: "Routine reboot initiated by Ansible"
reboot_timeout: 3600
- name: Start service elasticsearch, if not started
service:
name: elasticsearch
state: started
- name: Start service metricbeat, if not started
service:
name: metricbeat
state: started
- name: Restart Elastic Master Nodes
hosts: es_master
serial: 1
remote_user: root
tags: "second"
tasks:
- name: Start rolling restart for {{ansible_host}}
debug:
msg: "Rolling restart of node {{ansible_hostname}} initiated"
changed_when: true
- name: Check for cluster status until its Green
uri:
url: "https://{{ansible_host}}:9200/_cluster/health?pretty"
follow_redirects: none
method: GET
user: "<username>"
password: "<password>"
force_basic_auth: yes
validate_certs: no
register: result
until: result.json.status == "green" and result.json.number_of_nodes == 7
retries: 360 # 360 * 10 seconds = 1hour (60*60/10)
delay: 10 # Every 10 seconds
- name: Check if there is elected master for the cluster
uri:
url: "https://{{ansible_host}}:9200/_cat/master"
follow_redirects: none
method: GET
user: "<username>"
password: "<password>"
return_content: yes
force_basic_auth: yes
validate_certs: no
register: result
until: result.content | regex_search('((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)') != "" # This regex checks for an IP Address of elected master node
retries: 360 # 360 * 10 seconds = 1hour (60*60/10)
delay: 10 # Every 10 seconds
run_once: true
- name: Stop service td-agent
service:
name: td-agent
state: stopped
- name: Stop service elasticsearch
service:
name: elasticsearch
state: stopped
- name: Stop service metricbeat
service:
name: metricbeat
state: stopped
- name: reboot system
reboot:
msg: "Routine reboot initiated by Ansible"
reboot_timeout: 3600
- name: Start service elasticsearch, if not started
service:
name: elasticsearch
state: started
- name: Start service td-agent, if not started
service:
name: td-agent
state: started
- name: Start service metricbeat, if not started
service:
name: metricbeat
state: started
- name: Restart Kibana Nodes
hosts: kibana
serial: 1
remote_user: root
tags: "third"
tasks:
- name: Start rolling restart for {{ansible_host}}
debug:
msg: "Rolling restart of node {{ansible_hostname}} initiated"
changed_when: true
- name: Stop service Kibana
service:
name: kibana
state: stopped
- name: Stop service metricbeat
service:
name: metricbeat
state: stopped
- name: reboot system
reboot:
msg: "Routine reboot initiated by Ansible"
reboot_timeout: 3600
- name: Start service kibana, if not started
service:
name: kibana
state: started
- name: Start service metricbeat, if not started
service:
name: metricbeat
state: started
- name: Finalizing Reboots
hosts: "{{ (groups['es_master'] | shuffle)[0:1] }}"
serial: 1
remote_user: root
tags: "fourth"
tasks:
- name: Check for cluster status until its Green
uri:
url: "https://{{ansible_host}}:9200/_cluster/health?pretty"
follow_redirects: none
method: GET
user: "<username>"
password: "<password>"
force_basic_auth: yes
validate_certs: no
register: result
until: result.json.status == "green" and result.json.number_of_nodes == 7 #Change number of nodes
retries: 360 # 360 * 10 seconds = 1hour (60*60/10)
delay: 10 # Every 10 seconds
notify:
- Re-enable shard allocation
- name: Re-enable shard allocation
uri:
url: "https://{{ansible_host}}:9200/_cluster/settings?pretty"
follow_redirects: none
method: PUT
user: "<username>"
password: "<password>"
headers:
Content-Type: application/json
body: >
{ "persistent": { "cluster.routing.allocation.enable": null } }
body_format: json
force_basic_auth: yes
validate_certs: no
register: result
until: result.json.acknowledged == true
retries: 360 # 360 * 10 seconds = 1hour (60*60/10)
delay: 10 # Every 10 seconds
run_once: true
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment