Skip to content

Instantly share code, notes, and snippets.

@tamasgal
Created May 13, 2025 11:31
Show Gist options
  • Save tamasgal/b304c80a7692514326b6c9785aec7ef0 to your computer and use it in GitHub Desktop.
Save tamasgal/b304c80a7692514326b6c9785aec7ef0 to your computer and use it in GitHub Desktop.
A script to delete unused CI cache folders on GitLab CI cache volumes
#!/usr/bin/env python3
import os
import re
import shutil
from collections import defaultdict
from datetime import datetime, timedelta
import argparse
parser = argparse.ArgumentParser(description="Clean up old GitLab CI cache folders based on their cycle number and age.")
parser.add_argument('--cache-dir', help='Path to the GitLab CI cache directory', required=True)
parser.add_argument('--max-age', type=int, help='Maximum age (in days) before deletion', required=True)
parser.add_argument('--dry-run', action='store_true', help='Simulate actions without deleting anything')
args = parser.parse_args()
cache_dir = args.cache_dir
max_age = timedelta(days=args.max_age)
dry_run = args.dry_run
now = datetime.now()
def main():
grouped_dirs = defaultdict(list)
# Regex with capture groups: (cache_name)-(cycle_number)-(status)
pattern = re.compile(r'\b([\w\-]+)-(\d+)-(non_protected|protected)\b')
# Collect matching directories
for root, dirs, files in os.walk(cache_dir):
for d in dirs:
match = pattern.fullmatch(d)
if match:
name, number, status = match.groups()
key = (name, status)
full_path = os.path.join(root, d)
grouped_dirs[key].append((int(number), full_path))
# sizes in byte
total_reclaimed = 0
# Process and clean up
for key, entries in grouped_dirs.items():
entries.sort(reverse=True) # highest number first
_, latest_path = entries[0]
print(f"Checking: {os.path.basename(latest_path)}")
# Delete all others
for _, path in entries[1:]:
size = get_dir_size(path)
total_reclaimed += size
print(f" {'[DRY-RUN] ' if dry_run else ''}Deleting: {path} ({format_size(size)})")
if not dry_run:
pass
shutil.rmtree(path, ignore_errors=True)
if is_older_than(latest_path, max_age):
size = get_dir_size(latest_path)
total_reclaimed += size
print(f" {'[DRY-RUN] ' if dry_run else ''}Deleting (due to age): {latest_path} ({format_size(size)})")
if not dry_run:
shutil.rmtree(latest_path, ignore_errors=True)
else:
print(f" Keeping: {latest_path}")
print(f"\nTotal space reclaimed: {format_size(total_reclaimed)}")
if dry_run:
print("\n[Dry-run mode: No actual deletions were made.]")
def get_dir_size(path):
"""Return total size of directory in bytes."""
total = 0
for dirpath, dirnames, filenames in os.walk(path):
for f in filenames:
fp = os.path.join(dirpath, f)
if os.path.isfile(fp):
total += os.path.getsize(fp)
return total
def format_size(bytes_size):
"""Human-readable file size."""
for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
if bytes_size < 1024:
return f"{bytes_size:.2f} {unit}"
bytes_size /= 1024
return f"{bytes_size:.2f} PB"
def is_older_than(path, delta):
mtime = os.path.getmtime(path)
return datetime.fromtimestamp(mtime) < now - delta
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment