gokhanozdemir/directus-cleanup-orphan-rows.sql

gokhanozdemir · 2025-01-14T17:23:58Z

DO $$ 
DECLARE
    table_record RECORD;
    duplicate_count INTEGER;
    constraint_name TEXT;
    has_primary_key BOOLEAN;
BEGIN
    -- Loop through all tables in the current schema
    FOR table_record IN 
        SELECT table_schema, table_name 
        FROM information_schema.tables 
        WHERE table_schema = 'public' 
        AND table_type = 'BASE TABLE'
    LOOP
        -- Check if 'id' column exists
        IF EXISTS (
            SELECT 1 
            FROM information_schema.columns 
            WHERE table_schema = table_record.table_schema 
            AND table_name = table_record.table_name 
            AND column_name = 'id'
        ) THEN
            -- Check if 'id' is already a primary key
            SELECT EXISTS (
                SELECT 1 
                FROM information_schema.table_constraints tc
                JOIN information_schema.key_column_usage kcu 
                    ON tc.constraint_name = kcu.constraint_name
                    AND tc.table_schema = kcu.table_schema
                WHERE tc.constraint_type = 'PRIMARY KEY'
                AND kcu.column_name = 'id'
                AND tc.table_schema = table_record.table_schema
                AND tc.table_name = table_record.table_name
            ) INTO has_primary_key;

            -- Check for duplicates regardless of primary key status
            EXECUTE format('
                SELECT COUNT(*) 
                FROM (
                    SELECT id
                    FROM %I.%I
                    GROUP BY id
                    HAVING COUNT(*) > 1
                ) t', 
                table_record.table_schema, table_record.table_name) 
            INTO duplicate_count;

            IF duplicate_count > 0 THEN
                RAISE NOTICE 'Found % duplicate ID groups in %.%', 
                    duplicate_count, 
                    table_record.table_schema, 
                    table_record.table_name;

                -- Remove duplicates keeping the first occurrence
                EXECUTE format('
                    DELETE FROM %I.%I 
                    WHERE ctid NOT IN (
                        SELECT MIN(ctid)
                        FROM %I.%I
                        GROUP BY id
                    )', 
                    table_record.table_schema, table_record.table_name,
                    table_record.table_schema, table_record.table_name
                );
                
                RAISE NOTICE 'Removed duplicates from %.%', 
                    table_record.table_schema, 
                    table_record.table_name;
            END IF;

            -- Only add primary key if it doesn't exist
            IF NOT has_primary_key THEN
                -- Generate a unique constraint name
                constraint_name := table_record.table_name || '_id_pkey';

                -- Add primary key constraint
                EXECUTE format('
                    ALTER TABLE %I.%I 
                    ADD CONSTRAINT %I PRIMARY KEY (id)', 
                    table_record.table_schema, 
                    table_record.table_name,
                    constraint_name
                );

                RAISE NOTICE 'Added primary key constraint on id column for %.%', 
                    table_record.table_schema, 
                    table_record.table_name;
            ELSE
                RAISE NOTICE 'Table %.% already has a primary key on id column - skipping constraint creation', 
                    table_record.table_schema, 
                    table_record.table_name;
            END IF;
        ELSE
            RAISE NOTICE 'Table %.% does not have an id column', 
                table_record.table_schema, 
                table_record.table_name;
        END IF;
    END LOOP;
END $$;

gokhanozdemir · 2025-01-14T17:25:13Z

Directus activity table index auto increment is probably broken. Fix it.

DO $$
DECLARE
    latest_id INT;
BEGIN
    -- Get the latest id from the directus_activity table
    EXECUTE 'SELECT COALESCE(MAX(id), 0) FROM directus_activity' INTO latest_id;

    -- Set the next value of the sequence for the id column
    EXECUTE format('
        SELECT setval(pg_get_serial_sequence(%L, ''id''), %s)
    ', 'directus_activity', latest_id + 1);
END $$;

gokhanozdemir · 2025-01-14T17:27:39Z

DO $$ 
DECLARE
    table_record RECORD;
    index_exists BOOLEAN;
    broken_index_name TEXT;
BEGIN
    -- Loop through all tables in the current schema
    FOR table_record IN 
        SELECT table_schema, table_name 
        FROM information_schema.tables 
        WHERE table_schema = 'public' 
        AND table_type = 'BASE TABLE'
    LOOP
        -- Check if 'id' column exists
        IF EXISTS (
            SELECT 1 
            FROM information_schema.columns 
            WHERE table_schema = table_record.table_schema 
            AND table_name = table_record.table_name 
            AND column_name = 'id'
        ) THEN
            -- Find any existing broken index on id column
            SELECT pi.indexname INTO broken_index_name
            FROM pg_index i
            JOIN pg_class c ON c.oid = i.indrelid
            JOIN pg_class ic ON ic.oid = i.indexrelid
            JOIN pg_indexes pi ON pi.indexname = ic.relname
            WHERE c.relname = table_record.table_name
            AND pi.schemaname = table_record.table_schema
            AND pi.indexdef LIKE '%id%'
            AND i.indisvalid = false;

            IF broken_index_name IS NOT NULL THEN
                -- Drop the broken index
                EXECUTE format('DROP INDEX IF EXISTS %I.%I CASCADE',
                    table_record.table_schema,
                    broken_index_name
                );
                
                RAISE NOTICE 'Dropped broken index % on %.%',
                    broken_index_name,
                    table_record.table_schema,
                    table_record.table_name;

                -- Recreate the index
                EXECUTE format('
                    CREATE INDEX %I ON %I.%I (id)',
                    table_record.table_name || '_id_idx',
                    table_record.table_schema,
                    table_record.table_name
                );

                RAISE NOTICE 'Created new index on id column for %.%',
                    table_record.table_schema,
                    table_record.table_name;

                -- If this is a primary key, also reindex it
                IF EXISTS (
                    SELECT 1 
                    FROM information_schema.table_constraints tc
                    WHERE tc.constraint_type = 'PRIMARY KEY'
                    AND tc.table_schema = table_record.table_schema
                    AND tc.table_name = table_record.table_name
                ) THEN
                    EXECUTE format('REINDEX TABLE %I.%I',
                        table_record.table_schema,
                        table_record.table_name
                    );
                    
                    RAISE NOTICE 'Reindexed primary key for %.%',
                        table_record.table_schema,
                        table_record.table_name;
                END IF;
            END IF;
        END IF;
    END LOOP;
END $$;


	-- Directus Database Cleanup Script
	-- Purpose: Remove orphaned data and optimize database performance in Directus CMS
	-- Tested on: Directus 11.2.2 Postgres 16.0
	-- WARNING: Always backup your database before running cleanup scripts!
	-- DISCLAIMER: Use at your own risk!

	/**
	* Background:
	* During CMS development, especially with Directus, database structure changes frequently.
	* This can lead to:
	* - Accumulated orphaned data from deleted collections
	* - Unnecessary database bloat from logs and revisions
	* - Decreased query performance
	* - Storage space issues, especially on free-tier platforms
	*
	* Prerequisites:
	* 1. Create a complete backup of your database
	* 2. Test on a non-production environment first
	* 3. Consider disabling activity tracking on collections before running
	* 4. Ensure Directus is not running during cleanup
	* 5. Delete any unused collections before running (if they still exist in the database but not visible on Directus)
	* Note: Re-enabling tracking may cause errors (not tested)
	*
	* Usage Notes:
	* - Script removes activity logs and revisions for non-existent collections
	* - Performs database optimization after cleanup
	* - Particularly useful when approaching storage limits

	* Motivation:
	I had ~1 GB of SQL data, primarily consisting of logs. After running this script, and the one below my database size reduced to ~300 MB.Yours may vary.
	* This effort is inspired from: https://gist.github.com/madc/cee04370227a12127361a615d2ae8678
	* Thank you for sharing your sql and inspiring me research more on this topic.@macd
	*/

	-- Step 1: Remove orphaned activity logs
	-- Deletes entries from directus_activity where referenced collections no longer exist
	DELETE FROM public.directus_activity
	WHERE collection NOT IN (
	SELECT tablename
	FROM pg_tables
	WHERE schemaname = 'public'
	);

	-- Step 2: Clean revision history
	-- Removes revision records for deleted collections
	DELETE FROM public.directus_revisions
	WHERE collection NOT IN (
	SELECT tablename
	FROM pg_tables
	WHERE schemaname = 'public'
	);

	-- Step 3: Optimize database
	-- Reclaims storage space and updates query planner statistics
	VACUUM ANALYZE;

	/**
	* Post-Execution:
	* 1. Verify system functionality
	* 2. Monitor system performance
	* 3. Consider implementing regular cleanup schedule via flows
	*/

gokhanozdemir/directus-cleanup-orphan-rows.sql

gokhanozdemir commented Jan 14, 2025 •

edited

Loading

Uh oh!

gokhanozdemir commented Jan 14, 2025 •

edited

Loading

Uh oh!

gokhanozdemir commented Jan 14, 2025

Uh oh!

gokhanozdemir/directus-cleanup-orphan-rows.sql

gokhanozdemir commented Jan 14, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

gokhanozdemir commented Jan 14, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

gokhanozdemir commented Jan 14, 2025

Uh oh!

gokhanozdemir commented Jan 14, 2025 •

edited

Loading

gokhanozdemir commented Jan 14, 2025 •

edited

Loading