Skip to content

Instantly share code, notes, and snippets.

@kmoppel
Created January 21, 2025 08:55
Show Gist options
  • Save kmoppel/72eee9874ddce7bb14e56282eaadbffa to your computer and use it in GitHub Desktop.
Save kmoppel/72eee9874ddce7bb14e56282eaadbffa to your computer and use it in GitHub Desktop.
Postgres parallel_leader_participation test
#!/bin/bash
if [ -z "$4" ]; then
echo "Usage: ./launch_vms_and_run_test.sh INSTANCE_ID REGION STORAGE_TYPE STORAGE_MIN"
echo "Example:"
echo " ./launch_vms_and_run_test.sh run1-ebs eu-south-2 network 100"
exit 1
fi
mkdir logs results
set -eu
INSTANCE_ID="$1" # lowercase + dashes only
REGION="$2" # eu-south-2 (Spain) best in EU currently according to: pg_spot_operator --list-avg-spot-savings --region ^eu
STORAGE_TYPE="$3" # local | network
STORAGE_MIN="$4" # in GB
CPU_MIN=16
RAM_MIN=32
PROVISIONED_VOLUME_THROUGHPUT=500 # Default 125 MBs is very limiting
T1=$(date +%s)
# Launch a Spot VM with Postgres, place Ansible connstr in $INSTANCE_ID.ini
echo "Starting the test VM $INSTANCE_ID in region $REGION ..."
# Prerequisite: pipx install --include-deps ansible pg_spot_operator
# Details: https://github.com/pg-spot-ops/pg-spot-operator
pg_spot_operator --instance-name $INSTANCE_ID --region $REGION \
--cpu-min $CPU_MIN --ram-min $RAM_MIN \
--storage-type $STORAGE_TYPE --storage-min $STORAGE_MIN \
--volume-throughput $PROVISIONED_VOLUME_THROUGHPUT \
--connstr-only --connstr-format ansible \
--os-extra-packages rsync > $INSTANCE_ID.ini
echo "VM OK - running Ansible ..."
ANSIBLE_LOG_PATH=logs/ansible_${INSTANCE_ID}.log ansible-playbook -i $INSTANCE_ID.ini playbook.yml
if [ "$?" -eq 0 ]; then
echo "Tests OK, shutting down the instance ..."
pg_spot_operator --region $REGION --instance-name $INSTANCE_ID --teardown
else
echo "ERROR: Ansible failed - check the log at $ANSIBLE_LOG_PATH"
exit 1
fi
T2=$(date +%s)
DUR=$((T2-T1))
echo "Done in $DUR seconds"
#!/bin/bash
set -e
# Assumes runs "postgres" user + pg_stat_statements installed
CONNSTR_TESTDB="host=/var/run/postgresql dbname=postgres"
PGBENCH_SCALES="2000 5000" # RAM vs 2x RAM, assuming 32GB RAM
PGBENCH_INIT_FLAGS="-I dtgv --unlogged" # Don't need the PK
PGBENCH_PARTITIONS="0 8"
PGBENCH_DURATION=1800
PROTOCOL=prepared
PGBENCH_CLIENTS=1
TEST_QUERY="select bid, avg(abalance) from pgbench_accounts group by bid"
SQL_PGSS_SETUP="CREATE EXTENSION IF NOT EXISTS pg_stat_statements;"
SQL_PGSS_RESULTS_SETUP="CREATE TABLE IF NOT EXISTS public.pgss_results AS SELECT ''::text AS hostname, now() AS created_on, 0::numeric AS server_version_num, ''::text as random_page_cost, 0 AS scale, 0 AS duration, 0 AS clients, 0 AS partitions, ''::text AS parallel_leader_participation, 0::int max_parallel_workers_per_gather, mean_exec_time, stddev_exec_time, calls, rows, shared_blks_hit, shared_blks_read, blk_read_time, blk_write_time, query FROM public.pg_stat_statements WHERE false;"
SQL_PGSS_RESET="SELECT public.pg_stat_statements_reset();"
HOSTNAME=`hostname`
DUMP_FILE="pgss.dump"
function exec_sql() {
psql "$CONNSTR_TESTDB" -Xqc "$1"
}
START_TIME=`date +%s`
START_TIME_PG=`psql "$CONNSTR_RESULTSDB" -qAXtc "select now();"`
echo "Ensuring pg_stat_statements extension on result server and public.pgss_results table ..."
echo "Ensuring pg_stat_statements extension on test instance ..."
exec_sql "$SQL_PGSS_SETUP"
exec_sql "$SQL_PGSS_RESULTS_SETUP"
for SCALE in $PGBENCH_SCALES ; do
for PARTS in $PGBENCH_PARTITIONS ; do
echo -e "\n*** SCALE $SCALE ***\n"
echo "Creating test data using pgbench ..."
exec_sql "drop table if exists pgbench_accounts cascade"
echo "pgbench -i -q $PGBENCH_INIT_FLAGS -s $SCALE --partitions $PARTS \"$CONNSTR_TESTDB\" &>/dev/null"
pgbench -i -q $PGBENCH_INIT_FLAGS -s $SCALE --partitions $PARTS "$CONNSTR_TESTDB" &>/dev/null
DBSIZE=`psql "$CONNSTR_TESTDB" -XAtqc "select pg_size_pretty(pg_database_size(current_database()))"`
echo "DB size = $DBSIZE"
for parallel_leader_participation in on off ; do
for max_parallel_workers_per_gather in 2 4 8 16 ; do
exec_sql "alter system set parallel_leader_participation = $parallel_leader_participation;"
exec_sql "alter system set max_parallel_workers_per_gather = $max_parallel_workers_per_gather;"
exec_sql "select pg_reload_conf();"
echo "Starting the test loop ..."
echo "Reseting pg_stat_statements..."
exec_sql "$SQL_PGSS_RESET" >/dev/null
echo "Running the timed query test"
echo "echo '$TEST_QUERY' | pgbench --random-seed 666 -M $PROTOCOL -c $PGBENCH_CLIENTS -T $PGBENCH_DURATION -f- \"$CONNSTR_TESTDB\""
echo "$TEST_QUERY" | pgbench --random-seed 666 -M $PROTOCOL -c $PGBENCH_CLIENTS -T $PGBENCH_DURATION -f- "$CONNSTR_TESTDB"
exec_sql "insert into pgss_results select '${HOSTNAME}', now(), current_setting('server_version_num')::int, current_setting('random_page_cost'), ${SCALE}, ${PGBENCH_DURATION}, ${PGBENCH_CLIENTS}, $PARTS, '${parallel_leader_participation}', '${max_parallel_workers_per_gather}', mean_exec_time, stddev_exec_time, calls, rows, shared_blks_hit, shared_blks_read, blk_read_time, blk_write_time, query from public.pg_stat_statements where calls > 1 and query ~* '(INSERT|UPDATE|SELECT).*pgbench_accounts'"
echo "Done with max_parallel_workers_per_gather=$max_parallel_workers_per_gather"
done # max_parallel_workers_per_gather
echo "Done with parallel_leader_participation=$parallel_leader_participation"
done # parallel_leader_participation
echo "Done with SCALE $SCALE"
done # SCALE
echo "Done with PARTS $PARTS"
done # PARTS
echo "Dumping pgss_results to $DUMP_FILE ..."
pg_dump -t pgss_results > $DUMP_FILE
END_TIME=`date +%s`
echo -e "\nDONE in $((END_TIME-START_TIME)) s"
---
- name: Transfer and execute a script.
hosts: all
become: true
become_user: postgres
tasks:
- name: Transfer the script
ansible.builtin.copy: src=parallel_leader_participation_test.sh dest=/var/lib/postgresql/parallel_leader_participation_test.sh mode='0700'
- name: Run the test script
ansible.builtin.shell: /var/lib/postgresql/parallel_leader_participation_test.sh &> parallel_leader_participation_test_`date +%s`.log
args:
executable: /bin/bash
chdir: /var/lib/postgresql
- name: Storing in a path relative to the playbook
ansible.builtin.fetch:
src: /var/lib/postgresql/pgss.dump
dest: ./results/{{ inventory_hostname }}_pgss.dump
flat: true
fail_on_missing: true
- name: Pull run logs just in case
ansible.posix.synchronize:
src: /var/lib/postgresql/parallel_leader_*.log
dest: logs/
mode: pull
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment