Created
March 26, 2026 19:52
-
-
Save ronco/195aabfa520430df388a2b931b86b471 to your computer and use it in GitHub Desktop.
GitHub merged PR stats per developer — categories, repos, time-to-merge, reviewer turnaround (avg/median/p90). Requires gh CLI.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| Pull merged PR stats for a team of GitHub users. | |
| Uses the `gh` CLI to query GitHub's GraphQL API for merged PRs, | |
| then computes useful stats like count, avg/median/p90 time-to-merge, | |
| reviewer turnaround, and categorizes PRs by conventional commit type. | |
| Requires: gh CLI (https://cli.github.com/) authenticated via `gh auth login` | |
| Usage: | |
| python3 pr_merged_stats.py --orgs MyOrg --users alice bob charlie | |
| python3 pr_merged_stats.py --orgs OrgA OrgB --users alice bob --days 30 | |
| python3 pr_merged_stats.py --orgs MyOrg --users alice --exclude-pattern "bot|dependabot" | |
| """ | |
| import argparse | |
| import json | |
| import re | |
| import subprocess | |
| import sys | |
| from collections import defaultdict | |
| from datetime import datetime, timedelta, timezone | |
| from statistics import mean, median, quantiles | |
| DEFAULT_DAYS = 90 | |
| # Conventional commit prefix -> display category | |
| TYPE_MAP = { | |
| "feat": "feature", | |
| "fix": "fix", | |
| "chore": "chore", | |
| "docs": "docs", | |
| "refactor": "refactor", | |
| "test": "test", | |
| "ci": "ci", | |
| "build": "build", | |
| "perf": "perf", | |
| "style": "style", | |
| "revert": "revert", | |
| } | |
| CC_PATTERN = re.compile(r"^(\w+)(?:\(.*?\))?[!]?\s*:") | |
| def categorize_title(title: str) -> str: | |
| """Categorize a PR title based on conventional commit prefix.""" | |
| m = CC_PATTERN.match(title.strip()) | |
| if m: | |
| prefix = m.group(1).lower() | |
| return TYPE_MAP.get(prefix, prefix) | |
| lower = title.lower() | |
| if lower.startswith("fix") or "bugfix" in lower: | |
| return "fix" | |
| if lower.startswith("revert"): | |
| return "revert" | |
| return "other" | |
| def gh_graphql(query: str) -> dict: | |
| cmd = ["gh", "api", "graphql", "-f", f"query={query}"] | |
| result = subprocess.run(cmd, capture_output=True, text=True) | |
| if result.returncode != 0: | |
| print(f"Error: {result.stderr}", file=sys.stderr) | |
| sys.exit(1) | |
| return json.loads(result.stdout) | |
| def fetch_merged_prs(username: str, orgs: list[str], since: str) -> list[dict]: | |
| """Fetch PRs authored and merged by a user across multiple orgs.""" | |
| prs = [] | |
| for org in orgs: | |
| cursor = None | |
| while True: | |
| after = f', after: "{cursor}"' if cursor else "" | |
| query = f""" | |
| {{ | |
| search( | |
| query: "org:{org} is:pr is:merged author:{username} merged:>{since}" | |
| type: ISSUE | |
| first: 50 | |
| {after} | |
| ) {{ | |
| pageInfo {{ hasNextPage endCursor }} | |
| nodes {{ | |
| ... on PullRequest {{ | |
| number | |
| title | |
| repository {{ nameWithOwner }} | |
| createdAt | |
| mergedAt | |
| additions | |
| deletions | |
| changedFiles | |
| timelineItems(itemTypes: [REVIEW_REQUESTED_EVENT], first: 20) {{ | |
| nodes {{ | |
| ... on ReviewRequestedEvent {{ | |
| createdAt | |
| requestedReviewer {{ | |
| ... on User {{ login }} | |
| }} | |
| }} | |
| }} | |
| }} | |
| reviews(first: 50) {{ | |
| nodes {{ | |
| author {{ login }} | |
| submittedAt | |
| state | |
| }} | |
| }} | |
| }} | |
| }} | |
| }} | |
| }} | |
| """ | |
| data = gh_graphql(query) | |
| search = data["data"]["search"] | |
| for node in search["nodes"]: | |
| if node: | |
| prs.append(node) | |
| if search["pageInfo"]["hasNextPage"]: | |
| cursor = search["pageInfo"]["endCursor"] | |
| else: | |
| break | |
| return prs | |
| def parse_dt(s: str) -> datetime: | |
| return datetime.fromisoformat(s.replace("Z", "+00:00")) | |
| def p90(values: list[float]) -> float: | |
| if len(values) < 2: | |
| return values[0] if values else 0 | |
| return quantiles(values, n=10)[-1] | |
| def compute_stats(prs: list[dict], team_logins: set[str]) -> dict: | |
| merge_hours = [] | |
| repos = defaultdict(int) | |
| categories = defaultdict(int) | |
| total_additions = 0 | |
| total_deletions = 0 | |
| total_files = 0 | |
| reviewer_hours = defaultdict(list) | |
| for pr in prs: | |
| created = parse_dt(pr["createdAt"]) | |
| merged = parse_dt(pr["mergedAt"]) | |
| hours = (merged - created).total_seconds() / 3600 | |
| merge_hours.append(hours) | |
| repos[pr["repository"]["nameWithOwner"]] += 1 | |
| categories[categorize_title(pr["title"])] += 1 | |
| total_additions += pr.get("additions", 0) | |
| total_deletions += pr.get("deletions", 0) | |
| total_files += pr.get("changedFiles", 0) | |
| # Build review request times by reviewer | |
| request_times = {} | |
| for event in pr.get("timelineItems", {}).get("nodes", []): | |
| reviewer = event.get("requestedReviewer", {}) | |
| if reviewer and reviewer.get("login"): | |
| request_times[reviewer["login"].lower()] = parse_dt(event["createdAt"]) | |
| # Compute per-reviewer turnaround (first review after request) | |
| reviews_by_reviewer = defaultdict(list) | |
| for review in pr.get("reviews", {}).get("nodes", []): | |
| author = review.get("author", {}).get("login", "") | |
| if author and review.get("submittedAt"): | |
| reviews_by_reviewer[author.lower()].append(review) | |
| for reviewer_login, reviews in reviews_by_reviewer.items(): | |
| first_review = min(reviews, key=lambda r: r["submittedAt"]) | |
| review_at = parse_dt(first_review["submittedAt"]) | |
| baseline = request_times.get(reviewer_login, created) | |
| turnaround = (review_at - baseline).total_seconds() / 3600 | |
| if turnaround >= 0: | |
| reviewer_hours[reviewer_login].append(turnaround) | |
| return { | |
| "total_merged": len(prs), | |
| "repos": dict(repos), | |
| "categories": dict(categories), | |
| "merge_hours": merge_hours, | |
| "total_additions": total_additions, | |
| "total_deletions": total_deletions, | |
| "total_files_changed": total_files, | |
| "reviewer_hours": dict(reviewer_hours), | |
| } | |
| def fmt_hours(h: float) -> str: | |
| if h < 1: | |
| return f"{h * 60:.0f}m" | |
| if h < 24: | |
| return f"{h:.1f}h" | |
| return f"{h / 24:.1f}d" | |
| def main(): | |
| parser = argparse.ArgumentParser( | |
| description="Merged PR stats for a team of GitHub users.", | |
| formatter_class=argparse.RawDescriptionHelpFormatter, | |
| epilog="""examples: | |
| %(prog)s --orgs myorg --users alice bob charlie | |
| %(prog)s --orgs orgA orgB --users alice bob --days 30 | |
| %(prog)s --orgs myorg --users alice --exclude-pattern "dependabot|renovate" | |
| """, | |
| ) | |
| parser.add_argument( | |
| "--users", nargs="+", required=True, | |
| help="GitHub usernames to report on", | |
| ) | |
| parser.add_argument( | |
| "--orgs", nargs="+", required=True, | |
| help="GitHub org(s) to search", | |
| ) | |
| parser.add_argument( | |
| "--days", type=int, default=DEFAULT_DAYS, | |
| help=f"Lookback window in days (default: {DEFAULT_DAYS})", | |
| ) | |
| parser.add_argument( | |
| "--exclude-pattern", | |
| help="Regex pattern to exclude PRs by title (e.g. 'dependabot|renovate')", | |
| ) | |
| args = parser.parse_args() | |
| exclude_re = re.compile(args.exclude_pattern, re.IGNORECASE) if args.exclude_pattern else None | |
| team_logins = {u.lower() for u in args.users} | |
| since = (datetime.now(timezone.utc) - timedelta(days=args.days)).strftime("%Y-%m-%d") | |
| orgs_label = ", ".join(args.orgs) | |
| print(f"Merged PR Stats — {orgs_label} — last {args.days} days (since {since})") | |
| print(f"Users: {', '.join(f'@{u}' for u in args.users)}") | |
| if exclude_re: | |
| print(f"Excluding PRs matching: {args.exclude_pattern}") | |
| # Fetch all data | |
| all_stats = {} | |
| for username in args.users: | |
| print(f"\nFetching data for @{username}...", end=" ", flush=True) | |
| raw_prs = fetch_merged_prs(username, args.orgs, since) | |
| if exclude_re: | |
| excluded = [pr for pr in raw_prs if exclude_re.search(pr["title"])] | |
| prs = [pr for pr in raw_prs if not exclude_re.search(pr["title"])] | |
| print(f"found {len(raw_prs)} merged PRs, excluded {len(excluded)} by pattern") | |
| else: | |
| prs = raw_prs | |
| print(f"found {len(prs)} merged PRs") | |
| all_stats[username] = compute_stats(prs, team_logins) | |
| # --- Per-User Breakdown --- | |
| for username in args.users: | |
| s = all_stats[username] | |
| hrs = s["merge_hours"] | |
| print(f"\n{'=' * 70}") | |
| print(f" @{username}") | |
| print(f"{'=' * 70}") | |
| print(f" PRs merged: {s['total_merged']} " | |
| f"+{s['total_additions']} / -{s['total_deletions']} lines " | |
| f"{s['total_files_changed']} files changed") | |
| if hrs: | |
| print(f"\n Time to Merge") | |
| print(f" {'Avg':<8} {'Median':<8} {'p90':<8} {'Fastest':<8} {'Slowest':<8}") | |
| print(f" {'-'*8} {'-'*8} {'-'*8} {'-'*8} {'-'*8}") | |
| print(f" {fmt_hours(mean(hrs)):<8} {fmt_hours(median(hrs)):<8} " | |
| f"{fmt_hours(p90(hrs)):<8} {fmt_hours(min(hrs)):<8} {fmt_hours(max(hrs)):<8}") | |
| # Category breakdown | |
| cats = s["categories"] | |
| if cats: | |
| sorted_cats = sorted(cats.items(), key=lambda x: -x[1]) | |
| print(f"\n {'Category':<12} {'Count':>6} {'%':>6}") | |
| print(f" {'-'*12} {'-'*6} {'-'*6}") | |
| for cat, count in sorted_cats: | |
| pct = count / s["total_merged"] * 100 | |
| print(f" {cat:<12} {count:>6} {pct:>5.0f}%") | |
| # Repo breakdown | |
| repos = s["repos"] | |
| if repos: | |
| sorted_repos = sorted(repos.items(), key=lambda x: -x[1]) | |
| print(f"\n {'Repo':<40} {'Count':>6} {'%':>6}") | |
| print(f" {'-'*40} {'-'*6} {'-'*6}") | |
| for repo, count in sorted_repos: | |
| short = repo.split("/")[-1] | |
| pct = count / s["total_merged"] * 100 | |
| print(f" {short:<40} {count:>6} {pct:>5.0f}%") | |
| # Reviewer stats | |
| rev_hours = s["reviewer_hours"] | |
| notable_reviewers = { | |
| login: hours for login, hours in rev_hours.items() | |
| if login in team_logins or len(hours) >= 3 | |
| } | |
| if notable_reviewers: | |
| print(f"\n Reviewers of @{username}'s PRs") | |
| print(f" {'Reviewer':<20} {'Reviews':>7} {'Avg':>8} {'Median':>8} {'p90':>8}") | |
| print(f" {'-'*20} {'-'*7} {'-'*8} {'-'*8} {'-'*8}") | |
| for login, hours in sorted(notable_reviewers.items(), key=lambda x: -len(x[1])): | |
| print(f" {login:<20} {len(hours):>7} {fmt_hours(mean(hours)):>8} " | |
| f"{fmt_hours(median(hours)):>8} {fmt_hours(p90(hours)):>8}") | |
| # --- Team Comparison --- | |
| if len(args.users) > 1: | |
| print(f"\n{'=' * 70}") | |
| print(f" TEAM COMPARISON — MERGED PRs") | |
| print(f"{'=' * 70}") | |
| print(f" {'User':<20} {'Merged':>6} {'Added':>8} {'Deleted':>8} {'Files':>6}" | |
| f" {'Avg TTM':>8} {'Med TTM':>8} {'p90 TTM':>8}") | |
| print(f" {'-'*20} {'-'*6} {'-'*8} {'-'*8} {'-'*6} {'-'*8} {'-'*8} {'-'*8}") | |
| for username in args.users: | |
| s = all_stats[username] | |
| hrs = s["merge_hours"] | |
| avg = fmt_hours(mean(hrs)) if hrs else "n/a" | |
| med = fmt_hours(median(hrs)) if hrs else "n/a" | |
| p = fmt_hours(p90(hrs)) if hrs else "n/a" | |
| print( | |
| f" {username:<20} {s['total_merged']:>6}" | |
| f" {'+' + str(s['total_additions']):>8}" | |
| f" {'-' + str(s['total_deletions']):>8}" | |
| f" {s['total_files_changed']:>6}" | |
| f" {avg:>8} {med:>8} {p:>8}" | |
| ) | |
| # Review turnaround summary | |
| all_reviewer_hours = defaultdict(list) | |
| for s in all_stats.values(): | |
| for login, hours in s["reviewer_hours"].items(): | |
| all_reviewer_hours[login.lower()].extend(hours) | |
| team_reviewers = { | |
| login: hours for login, hours in all_reviewer_hours.items() | |
| if login in team_logins | |
| } | |
| if team_reviewers: | |
| print(f"\n{'=' * 70}") | |
| print(f" TEAM COMPARISON — REVIEW TURNAROUND (as reviewer)") | |
| print(f"{'=' * 70}") | |
| print(f" {'Reviewer':<20} {'Reviews':>7} {'Avg':>8} {'Median':>8} {'p90':>8}") | |
| print(f" {'-'*20} {'-'*7} {'-'*8} {'-'*8} {'-'*8}") | |
| for login in sorted(team_reviewers, key=lambda l: -len(team_reviewers[l])): | |
| hours = team_reviewers[login] | |
| print(f" {login:<20} {len(hours):>7}" | |
| f" {fmt_hours(mean(hours)):>8}" | |
| f" {fmt_hours(median(hours)):>8}" | |
| f" {fmt_hours(p90(hours)):>8}") | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment