krisiye · April 29, 2025 15:19
diff --git a/postgresql_cpu_troubleshooting.sql b/postgresql_cpu_troubleshooting.sql
 -- tested with postgres13. Lower versions may need tweaks.

 -- Top 20 cpu heavy queries
 SELECT
    query AS short_query, 
    round((total_plan_time + total_exec_time)::numeric, 2),
    calls,
    rows,
    round((total_plan_time + total_exec_time)::numeric / calls, 2) AS avg_time,
    round((100 * (total_plan_time + total_exec_time) / sum((total_plan_time + total_exec_time)::numeric) OVER ())::numeric, 2) AS percentage_cpu
 FROM pg_stat_statements
 ORDER BY percentage_cpu DESC
 LIMIT 20;

 -- find tables that are getting hit with sequential scans and maybe missing indexes
 SELECT
    schemaname,
    relname,
    seq_scan,
    seq_tup_read,
    idx_scan,
    seq_tup_read / seq_scan AS avg
 FROM pg_stat_user_tables
 WHERE seq_scan > 0
 ORDER BY seq_tup_read DESC;

 -- An index is missing if both the second and third column are big.
 SELECT relname,
       seq_scan,
       seq_tup_read / seq_scan AS tup_per_scan
 FROM pg_stat_user_tables
 WHERE seq_scan > 0;

 -- get stats on all indexes and usage
 SELECT * 
 FROM pg_stat_all_indexes
 WHERE schemaname 
 NOT IN ('pg_catalog', 'information_schema') 
 AND schemaname !~ '^pg_toast' 
 order by idx_tup_read desc;

 -- Look for foreign key constraints that are missing indexes on the referencing table.
 -- Make sure to add an index to your foreign keys as, unlike other engines, 
 -- PostgreSQL does not do that for you automatically.

 CREATE FUNCTION pg_temp.sortarray(int2[]) returns int2[] as '
  SELECT ARRAY(
      SELECT $1[i]
        FROM generate_series(array_lower($1, 1), array_upper($1, 1)) i
    ORDER BY 1
  )
 ' language sql;

  SELECT conrelid::regclass
         ,conname
         ,reltuples::bigint
    FROM pg_constraint
         JOIN pg_class ON (conrelid = pg_class.oid)
   WHERE contype = 'f'
         AND NOT EXISTS (
           SELECT 1
             FROM pg_index
            WHERE indrelid = conrelid
                  AND pg_temp.sortarray(conkey) = pg_temp.sortarray(indkey)
         )
 ORDER BY reltuples DESC;

 -- find top 10 queries that spend most time in the database for PostgreSQL
 SELECT
    total_plan_time+total_exec_time as total_time, 
    query 
 FROM pg_stat_statements 
 ORDER BY 1 DESC
 LIMIT 10;

 -- find top 10 queries and their buffer cache hit ratio. You would want them closer to 100%.
 SELECT
    query, 
    calls, 
    total_plan_time+total_exec_time as total_time, 
    rows, 
    100.0 * shared_blks_hit / nullif(shared_blks_hit + shared_blks_read, 0) AS hit_percent 
 FROM pg_stat_statements
 ORDER BY 3 DESC
 LIMIT 10;

 -- cache hit ratio
 SELECT
    sum(heap_blks_read) as heap_read,
    sum(heap_blks_hit) as heap_hit,
    sum(heap_blks_hit)/(sum(heap_blks_hit) + sum(heap_blks_read)) as cache_hit_ratio
 FROM pg_statio_user_tables;

 -- table level cache hit vs disk hits
 with
 all_tables as
 (
 SELECT  *
 FROM    (
    SELECT  'all'::text as table_name,
        sum( (coalesce(heap_blks_read,0) + coalesce(idx_blks_read,0) + coalesce(toast_blks_read,0) + coalesce(tidx_blks_read,0)) ) as from_disk,
        sum( (coalesce(heap_blks_hit,0)  + coalesce(idx_blks_hit,0)  + coalesce(toast_blks_hit,0)  + coalesce(tidx_blks_hit,0))  ) as from_cache
    FROM    pg_statio_all_tables  --> change to pg_statio_USER_tables if you want to check only user tables (excluding postgres's own tables)
    ) a
 WHERE   (from_disk + from_cache) > 0 -- discard tables without hits
 ),
 tables as
 (
 SELECT  *
 FROM    (
    SELECT  relname as table_name,
        ( (coalesce(heap_blks_read,0) + coalesce(idx_blks_read,0) + coalesce(toast_blks_read,0) + coalesce(tidx_blks_read,0)) ) as from_disk,
        ( (coalesce(heap_blks_hit,0)  + coalesce(idx_blks_hit,0)  + coalesce(toast_blks_hit,0)  + coalesce(tidx_blks_hit,0))  ) as from_cache
    FROM    pg_statio_all_tables --> change to pg_statio_USER_tables if you want to check only user tables (excluding postgres's own tables)
    ) a
 WHERE   (from_disk + from_cache) > 0 -- discard tables without hits
 )
 SELECT  table_name as "table name",
    from_disk as "disk hits",
    round((from_disk::numeric / (from_disk + from_cache)::numeric)*100.0,2) as "% disk hits",
    round((from_cache::numeric / (from_disk + from_cache)::numeric)*100.0,2) as "% cache hits",
    (from_disk + from_cache) as "total hits"
 FROM    (SELECT * FROM all_tables UNION ALL SELECT * FROM tables) a
 ORDER   BY (case when table_name = 'all' then 0 else 1 end), from_disk desc;

 -- List blocked queries and blocking pids (note you may still see the last query in the session as the blocking query)
 SELECT
    activity.pid,
    activity.usename,
    activity.query,
    blocking.pid AS blocking_id,
    blocking.query AS blocking_query
 FROM pg_stat_activity AS activity
 JOIN pg_stat_activity AS blocking ON blocking.pid = ANY(pg_blocking_pids(activity.pid));

 -- view queries and lock status
 select 
    relname as relation_name, 
    query, 
    pg_locks.* 
 from pg_locks
 join pg_class on pg_locks.relation = pg_class.oid
 join pg_stat_activity on pg_locks.pid = pg_stat_activity.pid
	-- tested with postgres13. Lower versions may need tweaks.

	-- Top 20 cpu heavy queries
	SELECT
	query AS short_query,
	round((total_plan_time + total_exec_time)::numeric, 2),
	calls,
	rows,
	round((total_plan_time + total_exec_time)::numeric / calls, 2) AS avg_time,
	round((100 * (total_plan_time + total_exec_time) / sum((total_plan_time + total_exec_time)::numeric) OVER ())::numeric, 2) AS percentage_cpu
	FROM pg_stat_statements
	ORDER BY percentage_cpu DESC
	LIMIT 20;

	-- find tables that are getting hit with sequential scans and maybe missing indexes
	SELECT
	schemaname,
	relname,
	seq_scan,
	seq_tup_read,
	idx_scan,
	seq_tup_read / seq_scan AS avg
	FROM pg_stat_user_tables
	WHERE seq_scan > 0
	ORDER BY seq_tup_read DESC;

	-- An index is missing if both the second and third column are big.
	SELECT relname,
	seq_scan,
	seq_tup_read / seq_scan AS tup_per_scan
	FROM pg_stat_user_tables
	WHERE seq_scan > 0;

	-- get stats on all indexes and usage
	SELECT *
	FROM pg_stat_all_indexes
	WHERE schemaname
	NOT IN ('pg_catalog', 'information_schema')
	AND schemaname !~ '^pg_toast'
	order by idx_tup_read desc;

	-- Look for foreign key constraints that are missing indexes on the referencing table.
	-- Make sure to add an index to your foreign keys as, unlike other engines,
	-- PostgreSQL does not do that for you automatically.

	CREATE FUNCTION pg_temp.sortarray(int2[]) returns int2[] as '
	SELECT ARRAY(
	SELECT $1[i]
	FROM generate_series(array_lower($1, 1), array_upper($1, 1)) i
	ORDER BY 1
	)
	' language sql;

	SELECT conrelid::regclass
	,conname
	,reltuples::bigint
	FROM pg_constraint
	JOIN pg_class ON (conrelid = pg_class.oid)
	WHERE contype = 'f'
	AND NOT EXISTS (
	SELECT 1
	FROM pg_index
	WHERE indrelid = conrelid
	AND pg_temp.sortarray(conkey) = pg_temp.sortarray(indkey)
	)
	ORDER BY reltuples DESC;

	-- find top 10 queries that spend most time in the database for PostgreSQL
	SELECT
	total_plan_time+total_exec_time as total_time,
	query
	FROM pg_stat_statements
	ORDER BY 1 DESC
	LIMIT 10;

	-- find top 10 queries and their buffer cache hit ratio. You would want them closer to 100%.
	SELECT
	query,
	calls,
	total_plan_time+total_exec_time as total_time,
	rows,
	100.0 * shared_blks_hit / nullif(shared_blks_hit + shared_blks_read, 0) AS hit_percent
	FROM pg_stat_statements
	ORDER BY 3 DESC
	LIMIT 10;

	-- cache hit ratio
	SELECT
	sum(heap_blks_read) as heap_read,
	sum(heap_blks_hit) as heap_hit,
	sum(heap_blks_hit)/(sum(heap_blks_hit) + sum(heap_blks_read)) as cache_hit_ratio
	FROM pg_statio_user_tables;

	-- table level cache hit vs disk hits
	with
	all_tables as
	(
	SELECT *
	FROM (
	SELECT 'all'::text as table_name,
	sum( (coalesce(heap_blks_read,0) + coalesce(idx_blks_read,0) + coalesce(toast_blks_read,0) + coalesce(tidx_blks_read,0)) ) as from_disk,
	sum( (coalesce(heap_blks_hit,0) + coalesce(idx_blks_hit,0) + coalesce(toast_blks_hit,0) + coalesce(tidx_blks_hit,0)) ) as from_cache
	FROM pg_statio_all_tables --> change to pg_statio_USER_tables if you want to check only user tables (excluding postgres's own tables)
	) a
	WHERE (from_disk + from_cache) > 0 -- discard tables without hits
	),
	tables as
	(
	SELECT *
	FROM (
	SELECT relname as table_name,
	( (coalesce(heap_blks_read,0) + coalesce(idx_blks_read,0) + coalesce(toast_blks_read,0) + coalesce(tidx_blks_read,0)) ) as from_disk,
	( (coalesce(heap_blks_hit,0) + coalesce(idx_blks_hit,0) + coalesce(toast_blks_hit,0) + coalesce(tidx_blks_hit,0)) ) as from_cache
	FROM pg_statio_all_tables --> change to pg_statio_USER_tables if you want to check only user tables (excluding postgres's own tables)
	) a
	WHERE (from_disk + from_cache) > 0 -- discard tables without hits
	)
	SELECT table_name as "table name",
	from_disk as "disk hits",
	round((from_disk::numeric / (from_disk + from_cache)::numeric)*100.0,2) as "% disk hits",
	round((from_cache::numeric / (from_disk + from_cache)::numeric)*100.0,2) as "% cache hits",
	(from_disk + from_cache) as "total hits"
	FROM (SELECT * FROM all_tables UNION ALL SELECT * FROM tables) a
	ORDER BY (case when table_name = 'all' then 0 else 1 end), from_disk desc;

	-- List blocked queries and blocking pids (note you may still see the last query in the session as the blocking query)
	SELECT
	activity.pid,
	activity.usename,
	activity.query,
	blocking.pid AS blocking_id,
	blocking.query AS blocking_query
	FROM pg_stat_activity AS activity
	JOIN pg_stat_activity AS blocking ON blocking.pid = ANY(pg_blocking_pids(activity.pid));

	-- view queries and lock status
	select
	relname as relation_name,
	query,
	pg_locks.*
	from pg_locks
	join pg_class on pg_locks.relation = pg_class.oid
	join pg_stat_activity on pg_locks.pid = pg_stat_activity.pid