Skip to content

Instantly share code, notes, and snippets.

@kirpachov
Last active May 9, 2025 17:12
Show Gist options
  • Save kirpachov/06ec1fd15d553a27eb0481eebcbf4574 to your computer and use it in GitHub Desktop.
Save kirpachov/06ec1fd15d553a27eb0481eebcbf4574 to your computer and use it in GitHub Desktop.
Scripts to setup postgresql with pitr on s3 + disaster recovery
{
"Version": "2012-10-17",
"Statement": [
{
"Sid": "AllowWriteTobackDBBidone",
"Effect": "Allow",
"Action": [
"s3:PutObject",
"s3:GetObjectAcl",
"s3:GetObject",
"s3:ListBucketMultipartUploads",
"s3:AbortMultipartUpload",
"s3:DeleteObjectVersion",
"s3:DeleteObject",
"s3:GetBucketAcl",
"s3:GetBucketLocation",
"s3:PutObjectAcl",
"s3:ReplicateDelete"
],
"Resource": [
"arn:aws:s3:::bak-db/bidone/*"
]
},
{
"Sid": "AllowListBucketsBackDB",
"Effect": "Allow",
"Action": [
"s3:ListBucket"
],
"Resource": [
"arn:aws:s3:::bak-db"
]
}
]
}

Production database must be a postgresql configured with wal-g and s3.

Copy locally a production database

In questo esempio specifico riproduciamo il database di lime3 (q.opinioni.net) in locale.

0. Generic requirements configuration

  • local postgresql installation
  • wal-g configured. Verify wal-g configuration with wal-g backup-list
  • aws iam user with s3 read permissions

1. Create a local postgres cluster

pg_createcluster 14 lime3

2. Download base backup

(you must have wal-g configured)

# List available base backups
wal-g backup-list

# select the one you need and download it.
# you may want to move the empty cluster folder somewhere else
# mv /var/lib/postgresql/14/lime3 /tmp/
wal-g backup-fetch /var/lib/postgresql/14/lime3 base_00000001000002D100000075

3. Configure recovery mode

# enable recovery mode: depends on postgresql version.
# postgresql 14 works like this:
touch /var/lib/postgresql/14/lime3/recovery.signal

# vim /etc/postgresql/14/lime3/postgresql.conf
max_connections = <same-as-production>
shared_buffers = <same-as-production>
restore_command = 'wal-g wal-fetch %f %p >> /tmp/wal.log 2>&1'
recovery_target_time = '2025-05-04 11:49:00.000000+00'
recovery_target_action = 'pause'

4. (optional) Monitors

# Terminal 1: cluster status
watch -cwt sudo pg_lsclusters

# Terminal 2: progress and errors
sudo tail -f /var/log/postgresql/postgresql-14-lime3.log

5. Start cluster

pg_ctlcluster 14 lime3 start
# /etc/crontab
# File system backup - add 5 minute delay between
0 6 * * * root aws s3 cp /etc/postgresql/ s3://bak-fs/bidone/etc/postgresql/ --recursive
5 6 * * * root aws s3 cp /home/ s3://bak-fs/bidone/home/ --recursive
10 6 * * * root aws s3 cp /var/log/ s3://bak-fs/bidone/var/log/ --recursive
# Push postgres base backup
0 1 * * * postgres [ $(date +\%u) -eq 7 ] && PGHOST=/var/run/postgresql wal-g backup-push /var/lib/postgresql/14/main
# Delete old base backups
0 2 * * * postgres wal-g delete --confirm retain 5
#!/bin/bash
set -e
set -u
DIR="/tmp/lime3-debug/"
prepare(){
mkdir -p $DIR
cd $DIR
sudo pg_ctlcluster 14 lime3 start
}
cleanup(){
rm -rf $DIR
}
deduplicate_csv(){
cat $DIR/out.csv | sort | uniq > $DIR/out-dedup.csv
mv $DIR/out-dedup.csv $DIR/out.csv
}
# Timestamp format: 2025-05-04 HH:mm:ss.000000+00
current_timestamp(){
sudo cat /etc/postgresql/14/lime3/postgresql.conf | grep recovery_target_time | grep -v "#recovery_target_timeline" | sed "s/#.*//;s/recovery_target_time =//;s/[']//g;s/[\t]$//g;s/^[ \t]*//;"
}
# Timestamp in format HH:mm:ss
formatted_timestamp() {
echo $(current_timestamp) | sed 's/2025-05-04 //;s/.000000+00//'
}
# TODO include heders when first row
print_row(){
psql -U postgres -p 5434 lime3 -c "COPY(SELECT '$(formatted_timestamp)', * FROM lime_survey_651468 WHERE id = 1968) TO STDOUT WITH (FORMAT CSV, FORCE_QUOTE *);"
}
print_headers(){
psql -U postgres -p 5434 lime3 -c "COPY(SELECT * FROM lime_survey_651468 WHERE false) TO STDOUT WITH (FORMAT CSV, HEADER TRUE, FORCE_QUOTE *);"
}
# Export row from db. Filename is current timestamp.
export_row() {
sudo su - postgres -c "echo $(print_row)" >> $DIR/out.csv
}
update_timestamp(){
new_timestamp="$@"
if [[ -z "$new_timestamp" ]]; then
echo "No timestamp provided. Exiting."
exit 1
fi
echo "Updating version to $new_timestamp"
sudo sed -i "s/recovery_target_time = '.*'/recovery_target_time = '$new_timestamp'/" /etc/postgresql/14/lime3/postgresql.conf
}
restart_postgres(){
sudo pg_ctlcluster 14 lime3 stop && sudo pg_ctlcluster 14 lime3 start
}
wait_postgres_ready() {
set +e
while true; do
echo "Verifica stato PostgreSQL..."
pg_isready -h localhost -p 5434 -U postgres > /dev/null 2>&1
pg_ready=$?
sudo pg_ctlcluster 14 lime3 status | grep -q "esecuzione"
cluster_status=$?
if [[ $pg_ready -eq 0 && $cluster_status -eq 0 ]]; then
echo "✅ PostgreSQL è pronto e il cluster è online."
break
else
echo "⏳ In attesa che PostgreSQL sia pronto... (ready: $pg_ready, cluster: $cluster_status)"
sleep 5
fi
done
set -e
}
run_for_timestamp(){
timestamp=$1
if [[ -z "$timestamp" ]]; then
echo "No timestamp provided. Exiting."
exit 1
fi
echo "Running for timestamp: $timestamp."
update_timestamp "$timestamp"
restart_postgres
wait_postgres_ready
export_row
}
run_from_file(){
prepare
file=$1
echo "Running for file: $file"
exec 3< "$file"
print_headers > $DIR/out.csv
while IFS= read -r line <&3; do
run_for_timestamp "$line"
# read -p "Are you sure? " -n 1 -r
# echo # (optional) move to a new line
# if [[ $REPLY =~ ^[Yy]$ ]]
# then
# echo "AA Running for timestamp: $line"
# fi
done
# Chiude il file descriptor
exec 3<&-
}
fresh_start(){
sudo service postgresql stop
sudo rm -rf /var/lib/postgresql/14/lime3/
sudo cp -r '/var/lib/postgresql/14/lime3-2025-05-04 13:49:00.085883+02' /var/lib/postgresql/14/lime3
sudo chown -R postgres:postgres /var/lib/postgresql/14/lime3
update_timestamp "2025-05-04 11:49:50.000000+00"
cleanup
}
# ################
# Preparation:
# ################
# - Create a base backup located at /var/lib/postgresql/14/lime3-2025-05-04 13:49:00.085883+02
# - configure /etc/postgresql/14/lime3/postgresql.conf with the following lines:
# max_connections = 500
# max_locks_per_transaction = 512
# recovery_target_action = pause
# recovery_target_time = '2025-05-04 11:50:05.000000+00'
# restore_command = 'wal-g wal-fetch %f %p >> /tmp/wal.log 2>&1'
# shared_buffers = 128MB
# (still preparation)
# ./debug-db.sh fresh_start
# ################
# Usage:
# ################
# ./debug-db.sh run_from_file /tmp/timestamps
$@
# 1. Install postgres (same version as master) - DON'T CONFIGURE IT AS MASTER YET - see above
# 2. Install wal-g and configure it - see above
# 3. Select the backup you want to apply from the list of the backups:
wal-g backup-list
# 3.5 move actual main to some other directory
# 4. Fetch the backup from s3. <dest-folder> may be /var/lib/postgresql/14/<backup_name>
wal-g backup-fetch /var/lib/postgresql/14/main <backup_name>
touch /var/lib/postgresql/14/main/recovery.signal
# stop postgres
service postgresql stop
vim /etc/postgresql/14/main/postgresql.conf
# data_directory = '<dest-folder>'
# restore_command = 'wal-g wal-fetch %f %p >> /tmp/wal.log 2>&1'
# recovery_target = 'immediate'
# recovery_target_action = 'promote'
# (optional) recovery_target_time = '2020-07-27 01:23:00.000000+00'
# NOTE: Max connections parameter must be as the MASTER configs was
service postgresql start

Fail2ban configuration

Assuming fail2ban is installed and current user is root

cp /etc/fail2ban/jail.conf /etc/fail2ban/jail.local

vim /etc/fail2ban/jail.local

# bantime.increment = true
# ignoreip = 127.0.0.1/8 ::1 <office-ip>

vim /etc/fail2ban/jail.d/postgresql.local

[postgresql]
enabled = true
port = 5432
logpath = /var/log/postgresql/*.log

vim /etc/fail2ban/filter.d/postgresql.local

# Fail2ban filter for postgreSQL

[Definition]

failregex = FATAL:  no pg_hba.conf entry for host "<HOST>"

datepattern = ^\[%%Y/%%b/%%d %%H:%%M:%%S\]
# Login as root
# Note that we're installing postgresql 14 here.
# Common utilities
apt update && apt dist-upgrade -y && apt install -y fail2ban haveged net-tools iftop htop curl iotop gnupg gnupg2 gnupg1 wget vim lsb-release zip unzip ntpsec-ntpdate
# reboot may be needed a this point.
## Installing postgresql
sh -c 'echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list'
wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add -
apt update && apt install -y postgresql-14
# or just 'postgresql' if you want the latest version.
# --- Postgres installation is done. ---
# Continue only if you're creating a MASTER cluster. If you're recovering from existing databases, see disaster_recovery.sh
# Note that the following configurations are for a 16 GB Memory / 8 AMD vCPUs / 320 GB Disk ubuntu 24 machine specific for postgres 14.
vim /etc/postgresql/14/main/postgresql.conf
# listen_addresses = '*'
# max_connections = 500
# ssl = on
# (?) ssl_ca_file =
# (?) ssl_cert_file =
# (?) ssl_key_file =
# shared_buffers = 8GB
# temp_buffers = 64MB
# maintenance_work_mem = 128MB
# (?) dynamic_shared_memory_type = posix
# random_page_cost = 1.0
# seq_page_cost = 1.0
# log_min_duration_statement = 200
# log_line_prefix = '%m [%p] %q%u@%d '
# log_timezone = 'localtime'
# datestyle = 'iso, mdy'
# timezone = 'localtime'
# lc_messages = 'C.UTF-8'
# lc_monetary = 'C.UTF-8'
# lc_numeric = 'C.UTF-8'
# lc_time = 'C.UTF-8'
# default_text_search_config = 'pg_catalog.english'
# To enable fail2ban, you'll need to log the ip of the connection %h:
# log_connections = on
# log_line_prefix = '%m {%h} [%p] %q%u@%d '
# https://www.postgresql.org/docs/14/continuous-archiving.html
# wal_level = replica
# max_wal_size = 1GB # default
# min_wal_size = 80MB # default
# archive_mode = on
# archive_command = '/usr/local/bin/wal-g wal-push %p'
# archive_timeout = 300
# --- Postgresql configuration is done. ---
# Remember to push to s3 periodic base backups and delete old. You may do it with
vim /etc/crontab
0 6 * * mon postgres PGHOST=/var/run/postgresql wal-g backup-push /var/lib/postgresql/14/main
0 2 * * * postgres wal-g delete --confirm retain 5
# For postgres 10, replication works like this:
standby_mode = 'on'
primary_conninfo = 'host=<mater-ip> port=5432 user=bidone_replication'
restore_command = 'wal-g --config /var/lib/postgresql/.walg.patata.json wal-fetch %f %p >> /tmp/wal.log 2>&1'
archive_cleanup_command = 'pg_archivecleanup /tmp/ %r'

Setup Postgresql database replica

Per migrare da un database ad un altro può essere necessario prima di scambiare le connessioni creare il nuovo database come "replica" dell'originale. In questo modo, al momento della migrazione i due database saranno identici e permetterà downtime pari a zero.

Nel master

Il 'master' è il database attualmente in uso.

... TODO ...

# https://github.com/wal-g/wal-g/blob/master/docs/PostgreSQL.md
# as root
# Check latest version at https://github.com/wal-g/wal-g/releases
wget https://github.com/wal-g/wal-g/releases/download/v3.0.3/wal-g-pg-ubuntu-20.04-amd64 -O /usr/local/bin/wal-g
chmod +x /usr/local/bin/wal-g
su - postgres
# Check if you can run wal-g as non-administrator user.
vim $HOME/.walg.json
{
"AWS_ACCESS_KEY_ID": "<key>",
"AWS_REGION": "eu-central-1",
"AWS_SECRET_ACCESS_KEY": "<secret>",
"WALG_S3_PREFIX": "s3://pitr-demetra/bidone"
}
# List commands with
wal-g --help
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment