Skip to content

Instantly share code, notes, and snippets.

@ronco
Created March 26, 2026 19:52
Show Gist options
  • Select an option

  • Save ronco/195aabfa520430df388a2b931b86b471 to your computer and use it in GitHub Desktop.

Select an option

Save ronco/195aabfa520430df388a2b931b86b471 to your computer and use it in GitHub Desktop.
GitHub merged PR stats per developer — categories, repos, time-to-merge, reviewer turnaround (avg/median/p90). Requires gh CLI.
#!/usr/bin/env python3
"""
Pull merged PR stats for a team of GitHub users.
Uses the `gh` CLI to query GitHub's GraphQL API for merged PRs,
then computes useful stats like count, avg/median/p90 time-to-merge,
reviewer turnaround, and categorizes PRs by conventional commit type.
Requires: gh CLI (https://cli.github.com/) authenticated via `gh auth login`
Usage:
python3 pr_merged_stats.py --orgs MyOrg --users alice bob charlie
python3 pr_merged_stats.py --orgs OrgA OrgB --users alice bob --days 30
python3 pr_merged_stats.py --orgs MyOrg --users alice --exclude-pattern "bot|dependabot"
"""
import argparse
import json
import re
import subprocess
import sys
from collections import defaultdict
from datetime import datetime, timedelta, timezone
from statistics import mean, median, quantiles
DEFAULT_DAYS = 90
# Conventional commit prefix -> display category
TYPE_MAP = {
"feat": "feature",
"fix": "fix",
"chore": "chore",
"docs": "docs",
"refactor": "refactor",
"test": "test",
"ci": "ci",
"build": "build",
"perf": "perf",
"style": "style",
"revert": "revert",
}
CC_PATTERN = re.compile(r"^(\w+)(?:\(.*?\))?[!]?\s*:")
def categorize_title(title: str) -> str:
"""Categorize a PR title based on conventional commit prefix."""
m = CC_PATTERN.match(title.strip())
if m:
prefix = m.group(1).lower()
return TYPE_MAP.get(prefix, prefix)
lower = title.lower()
if lower.startswith("fix") or "bugfix" in lower:
return "fix"
if lower.startswith("revert"):
return "revert"
return "other"
def gh_graphql(query: str) -> dict:
cmd = ["gh", "api", "graphql", "-f", f"query={query}"]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
print(f"Error: {result.stderr}", file=sys.stderr)
sys.exit(1)
return json.loads(result.stdout)
def fetch_merged_prs(username: str, orgs: list[str], since: str) -> list[dict]:
"""Fetch PRs authored and merged by a user across multiple orgs."""
prs = []
for org in orgs:
cursor = None
while True:
after = f', after: "{cursor}"' if cursor else ""
query = f"""
{{
search(
query: "org:{org} is:pr is:merged author:{username} merged:>{since}"
type: ISSUE
first: 50
{after}
) {{
pageInfo {{ hasNextPage endCursor }}
nodes {{
... on PullRequest {{
number
title
repository {{ nameWithOwner }}
createdAt
mergedAt
additions
deletions
changedFiles
timelineItems(itemTypes: [REVIEW_REQUESTED_EVENT], first: 20) {{
nodes {{
... on ReviewRequestedEvent {{
createdAt
requestedReviewer {{
... on User {{ login }}
}}
}}
}}
}}
reviews(first: 50) {{
nodes {{
author {{ login }}
submittedAt
state
}}
}}
}}
}}
}}
}}
"""
data = gh_graphql(query)
search = data["data"]["search"]
for node in search["nodes"]:
if node:
prs.append(node)
if search["pageInfo"]["hasNextPage"]:
cursor = search["pageInfo"]["endCursor"]
else:
break
return prs
def parse_dt(s: str) -> datetime:
return datetime.fromisoformat(s.replace("Z", "+00:00"))
def p90(values: list[float]) -> float:
if len(values) < 2:
return values[0] if values else 0
return quantiles(values, n=10)[-1]
def compute_stats(prs: list[dict], team_logins: set[str]) -> dict:
merge_hours = []
repos = defaultdict(int)
categories = defaultdict(int)
total_additions = 0
total_deletions = 0
total_files = 0
reviewer_hours = defaultdict(list)
for pr in prs:
created = parse_dt(pr["createdAt"])
merged = parse_dt(pr["mergedAt"])
hours = (merged - created).total_seconds() / 3600
merge_hours.append(hours)
repos[pr["repository"]["nameWithOwner"]] += 1
categories[categorize_title(pr["title"])] += 1
total_additions += pr.get("additions", 0)
total_deletions += pr.get("deletions", 0)
total_files += pr.get("changedFiles", 0)
# Build review request times by reviewer
request_times = {}
for event in pr.get("timelineItems", {}).get("nodes", []):
reviewer = event.get("requestedReviewer", {})
if reviewer and reviewer.get("login"):
request_times[reviewer["login"].lower()] = parse_dt(event["createdAt"])
# Compute per-reviewer turnaround (first review after request)
reviews_by_reviewer = defaultdict(list)
for review in pr.get("reviews", {}).get("nodes", []):
author = review.get("author", {}).get("login", "")
if author and review.get("submittedAt"):
reviews_by_reviewer[author.lower()].append(review)
for reviewer_login, reviews in reviews_by_reviewer.items():
first_review = min(reviews, key=lambda r: r["submittedAt"])
review_at = parse_dt(first_review["submittedAt"])
baseline = request_times.get(reviewer_login, created)
turnaround = (review_at - baseline).total_seconds() / 3600
if turnaround >= 0:
reviewer_hours[reviewer_login].append(turnaround)
return {
"total_merged": len(prs),
"repos": dict(repos),
"categories": dict(categories),
"merge_hours": merge_hours,
"total_additions": total_additions,
"total_deletions": total_deletions,
"total_files_changed": total_files,
"reviewer_hours": dict(reviewer_hours),
}
def fmt_hours(h: float) -> str:
if h < 1:
return f"{h * 60:.0f}m"
if h < 24:
return f"{h:.1f}h"
return f"{h / 24:.1f}d"
def main():
parser = argparse.ArgumentParser(
description="Merged PR stats for a team of GitHub users.",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""examples:
%(prog)s --orgs myorg --users alice bob charlie
%(prog)s --orgs orgA orgB --users alice bob --days 30
%(prog)s --orgs myorg --users alice --exclude-pattern "dependabot|renovate"
""",
)
parser.add_argument(
"--users", nargs="+", required=True,
help="GitHub usernames to report on",
)
parser.add_argument(
"--orgs", nargs="+", required=True,
help="GitHub org(s) to search",
)
parser.add_argument(
"--days", type=int, default=DEFAULT_DAYS,
help=f"Lookback window in days (default: {DEFAULT_DAYS})",
)
parser.add_argument(
"--exclude-pattern",
help="Regex pattern to exclude PRs by title (e.g. 'dependabot|renovate')",
)
args = parser.parse_args()
exclude_re = re.compile(args.exclude_pattern, re.IGNORECASE) if args.exclude_pattern else None
team_logins = {u.lower() for u in args.users}
since = (datetime.now(timezone.utc) - timedelta(days=args.days)).strftime("%Y-%m-%d")
orgs_label = ", ".join(args.orgs)
print(f"Merged PR Stats — {orgs_label} — last {args.days} days (since {since})")
print(f"Users: {', '.join(f'@{u}' for u in args.users)}")
if exclude_re:
print(f"Excluding PRs matching: {args.exclude_pattern}")
# Fetch all data
all_stats = {}
for username in args.users:
print(f"\nFetching data for @{username}...", end=" ", flush=True)
raw_prs = fetch_merged_prs(username, args.orgs, since)
if exclude_re:
excluded = [pr for pr in raw_prs if exclude_re.search(pr["title"])]
prs = [pr for pr in raw_prs if not exclude_re.search(pr["title"])]
print(f"found {len(raw_prs)} merged PRs, excluded {len(excluded)} by pattern")
else:
prs = raw_prs
print(f"found {len(prs)} merged PRs")
all_stats[username] = compute_stats(prs, team_logins)
# --- Per-User Breakdown ---
for username in args.users:
s = all_stats[username]
hrs = s["merge_hours"]
print(f"\n{'=' * 70}")
print(f" @{username}")
print(f"{'=' * 70}")
print(f" PRs merged: {s['total_merged']} "
f"+{s['total_additions']} / -{s['total_deletions']} lines "
f"{s['total_files_changed']} files changed")
if hrs:
print(f"\n Time to Merge")
print(f" {'Avg':<8} {'Median':<8} {'p90':<8} {'Fastest':<8} {'Slowest':<8}")
print(f" {'-'*8} {'-'*8} {'-'*8} {'-'*8} {'-'*8}")
print(f" {fmt_hours(mean(hrs)):<8} {fmt_hours(median(hrs)):<8} "
f"{fmt_hours(p90(hrs)):<8} {fmt_hours(min(hrs)):<8} {fmt_hours(max(hrs)):<8}")
# Category breakdown
cats = s["categories"]
if cats:
sorted_cats = sorted(cats.items(), key=lambda x: -x[1])
print(f"\n {'Category':<12} {'Count':>6} {'%':>6}")
print(f" {'-'*12} {'-'*6} {'-'*6}")
for cat, count in sorted_cats:
pct = count / s["total_merged"] * 100
print(f" {cat:<12} {count:>6} {pct:>5.0f}%")
# Repo breakdown
repos = s["repos"]
if repos:
sorted_repos = sorted(repos.items(), key=lambda x: -x[1])
print(f"\n {'Repo':<40} {'Count':>6} {'%':>6}")
print(f" {'-'*40} {'-'*6} {'-'*6}")
for repo, count in sorted_repos:
short = repo.split("/")[-1]
pct = count / s["total_merged"] * 100
print(f" {short:<40} {count:>6} {pct:>5.0f}%")
# Reviewer stats
rev_hours = s["reviewer_hours"]
notable_reviewers = {
login: hours for login, hours in rev_hours.items()
if login in team_logins or len(hours) >= 3
}
if notable_reviewers:
print(f"\n Reviewers of @{username}'s PRs")
print(f" {'Reviewer':<20} {'Reviews':>7} {'Avg':>8} {'Median':>8} {'p90':>8}")
print(f" {'-'*20} {'-'*7} {'-'*8} {'-'*8} {'-'*8}")
for login, hours in sorted(notable_reviewers.items(), key=lambda x: -len(x[1])):
print(f" {login:<20} {len(hours):>7} {fmt_hours(mean(hours)):>8} "
f"{fmt_hours(median(hours)):>8} {fmt_hours(p90(hours)):>8}")
# --- Team Comparison ---
if len(args.users) > 1:
print(f"\n{'=' * 70}")
print(f" TEAM COMPARISON — MERGED PRs")
print(f"{'=' * 70}")
print(f" {'User':<20} {'Merged':>6} {'Added':>8} {'Deleted':>8} {'Files':>6}"
f" {'Avg TTM':>8} {'Med TTM':>8} {'p90 TTM':>8}")
print(f" {'-'*20} {'-'*6} {'-'*8} {'-'*8} {'-'*6} {'-'*8} {'-'*8} {'-'*8}")
for username in args.users:
s = all_stats[username]
hrs = s["merge_hours"]
avg = fmt_hours(mean(hrs)) if hrs else "n/a"
med = fmt_hours(median(hrs)) if hrs else "n/a"
p = fmt_hours(p90(hrs)) if hrs else "n/a"
print(
f" {username:<20} {s['total_merged']:>6}"
f" {'+' + str(s['total_additions']):>8}"
f" {'-' + str(s['total_deletions']):>8}"
f" {s['total_files_changed']:>6}"
f" {avg:>8} {med:>8} {p:>8}"
)
# Review turnaround summary
all_reviewer_hours = defaultdict(list)
for s in all_stats.values():
for login, hours in s["reviewer_hours"].items():
all_reviewer_hours[login.lower()].extend(hours)
team_reviewers = {
login: hours for login, hours in all_reviewer_hours.items()
if login in team_logins
}
if team_reviewers:
print(f"\n{'=' * 70}")
print(f" TEAM COMPARISON — REVIEW TURNAROUND (as reviewer)")
print(f"{'=' * 70}")
print(f" {'Reviewer':<20} {'Reviews':>7} {'Avg':>8} {'Median':>8} {'p90':>8}")
print(f" {'-'*20} {'-'*7} {'-'*8} {'-'*8} {'-'*8}")
for login in sorted(team_reviewers, key=lambda l: -len(team_reviewers[l])):
hours = team_reviewers[login]
print(f" {login:<20} {len(hours):>7}"
f" {fmt_hours(mean(hours)):>8}"
f" {fmt_hours(median(hours)):>8}"
f" {fmt_hours(p90(hours)):>8}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment