Created
May 13, 2025 11:31
-
-
Save tamasgal/b304c80a7692514326b6c9785aec7ef0 to your computer and use it in GitHub Desktop.
A script to delete unused CI cache folders on GitLab CI cache volumes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import os | |
import re | |
import shutil | |
from collections import defaultdict | |
from datetime import datetime, timedelta | |
import argparse | |
parser = argparse.ArgumentParser(description="Clean up old GitLab CI cache folders based on their cycle number and age.") | |
parser.add_argument('--cache-dir', help='Path to the GitLab CI cache directory', required=True) | |
parser.add_argument('--max-age', type=int, help='Maximum age (in days) before deletion', required=True) | |
parser.add_argument('--dry-run', action='store_true', help='Simulate actions without deleting anything') | |
args = parser.parse_args() | |
cache_dir = args.cache_dir | |
max_age = timedelta(days=args.max_age) | |
dry_run = args.dry_run | |
now = datetime.now() | |
def main(): | |
grouped_dirs = defaultdict(list) | |
# Regex with capture groups: (cache_name)-(cycle_number)-(status) | |
pattern = re.compile(r'\b([\w\-]+)-(\d+)-(non_protected|protected)\b') | |
# Collect matching directories | |
for root, dirs, files in os.walk(cache_dir): | |
for d in dirs: | |
match = pattern.fullmatch(d) | |
if match: | |
name, number, status = match.groups() | |
key = (name, status) | |
full_path = os.path.join(root, d) | |
grouped_dirs[key].append((int(number), full_path)) | |
# sizes in byte | |
total_reclaimed = 0 | |
# Process and clean up | |
for key, entries in grouped_dirs.items(): | |
entries.sort(reverse=True) # highest number first | |
_, latest_path = entries[0] | |
print(f"Checking: {os.path.basename(latest_path)}") | |
# Delete all others | |
for _, path in entries[1:]: | |
size = get_dir_size(path) | |
total_reclaimed += size | |
print(f" {'[DRY-RUN] ' if dry_run else ''}Deleting: {path} ({format_size(size)})") | |
if not dry_run: | |
pass | |
shutil.rmtree(path, ignore_errors=True) | |
if is_older_than(latest_path, max_age): | |
size = get_dir_size(latest_path) | |
total_reclaimed += size | |
print(f" {'[DRY-RUN] ' if dry_run else ''}Deleting (due to age): {latest_path} ({format_size(size)})") | |
if not dry_run: | |
shutil.rmtree(latest_path, ignore_errors=True) | |
else: | |
print(f" Keeping: {latest_path}") | |
print(f"\nTotal space reclaimed: {format_size(total_reclaimed)}") | |
if dry_run: | |
print("\n[Dry-run mode: No actual deletions were made.]") | |
def get_dir_size(path): | |
"""Return total size of directory in bytes.""" | |
total = 0 | |
for dirpath, dirnames, filenames in os.walk(path): | |
for f in filenames: | |
fp = os.path.join(dirpath, f) | |
if os.path.isfile(fp): | |
total += os.path.getsize(fp) | |
return total | |
def format_size(bytes_size): | |
"""Human-readable file size.""" | |
for unit in ['B', 'KB', 'MB', 'GB', 'TB']: | |
if bytes_size < 1024: | |
return f"{bytes_size:.2f} {unit}" | |
bytes_size /= 1024 | |
return f"{bytes_size:.2f} PB" | |
def is_older_than(path, delta): | |
mtime = os.path.getmtime(path) | |
return datetime.fromtimestamp(mtime) < now - delta | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment