Skip to content

Instantly share code, notes, and snippets.

@ggorlen
Created May 16, 2026 14:07
Show Gist options
  • Select an option

  • Save ggorlen/88aa800318495d3536e777207dc8353f to your computer and use it in GitHub Desktop.

Select an option

Save ggorlen/88aa800318495d3536e777207dc8353f to your computer and use it in GitHub Desktop.
better git stats
#!/usr/bin/env python3
# Git stats are misleading because some people squash merge and some people
# create merge commits, bringing their history of commits from the branch onto
# master and making it look like they did way more work relative to squashers.
#
# This tries to treat merge commits as if they were squash merged so repo stats
# are actually normalized/comparable. It also merges by identical name and email.
#
# I'm sure this isn't 100% accurate, particularly for LOC, but anecdotally seems
# to give reasonable results.
#
# See also:
# - https://chatgpt.com/c/6a086d4c-da14-83ea-b0e8-5a887e199692
# - https://stackoverflow.com/questions/49002013
import subprocess
import sys
import json
from collections import defaultdict
def git(*args):
return subprocess.check_output(
["git", *args],
text=True,
encoding="utf-8",
)
def find(parent, x):
if x not in parent:
parent[x] = x
while parent[x] != x:
parent[x] = parent[parent[x]]
x = parent[x]
return x
def union(parent, a, b):
ra = find(parent, a)
rb = find(parent, b)
if ra != rb:
parent[rb] = ra
def parse_commits():
commits = []
parent = {}
log = git(
"log",
"--first-parent",
"--format=%H|%aN|%aE|%P",
)
for line in log.splitlines():
commit_hash, name, email, parents = line.split("|")
parents = parents.split()
commits.append((commit_hash, name, email, parents))
union(parent, f"name:{name}", f"email:{email}")
return commits, parent
def parse_loc():
commit_loc = {}
numstat_output = git(
"log",
"--first-parent",
"--format=COMMIT:%H",
"--numstat",
)
current = None
added = 0
deleted = 0
for line in numstat_output.splitlines():
if line.startswith("COMMIT:"):
if current is not None:
commit_loc[current] = (added, deleted)
current = line.split(":", 1)[1]
added = 0
deleted = 0
continue
if not line.strip():
continue
parts = line.split("\t")
if len(parts) < 3:
continue
a, d, _ = parts
if a != "-":
added += int(a)
if d != "-":
deleted += int(d)
if current is not None:
commit_loc[current] = (added, deleted)
return commit_loc
def aggregate(commits, uf_parent, commit_loc):
counts = defaultdict(int)
loc_added = defaultdict(int)
loc_deleted = defaultdict(int)
names = defaultdict(lambda: defaultdict(int))
emails = defaultdict(set)
hidden = set()
for commit_hash, name, email, parents in commits:
group = find(uf_parent, f"name:{name}")
names[group][name] += 1
emails[group].add(email)
if commit_hash in hidden:
continue
a, d = commit_loc.get(commit_hash, (0, 0))
if len(parents) == 2:
main_parent, merged_parent = parents
counts[group] += 1
loc_added[group] += a
loc_deleted[group] += d
merged = git(
"rev-list",
merged_parent,
f"^{main_parent}",
).splitlines()
hidden.update(merged)
else:
counts[group] += 1
loc_added[group] += a
loc_deleted[group] += d
return counts, loc_added, loc_deleted, names, emails
def format_results(counts, loc_added, loc_deleted, names, emails):
results = []
for group, n in sorted(counts.items(), key=lambda x: x[1], reverse=True):
canonical = max(names[group].items(), key=lambda x: x[1])[0]
results.append(
{
"name": canonical,
"emails": sorted(emails[group]),
"commits": n,
"loc_added": loc_added[group],
"loc_deleted": loc_deleted[group],
}
)
return results
def main():
json_output = "--json" in sys.argv
commits, uf_parent = parse_commits()
commit_loc = parse_loc()
counts, loc_added, loc_deleted, names, emails = aggregate(
commits,
uf_parent,
commit_loc,
)
results = format_results(
counts,
loc_added,
loc_deleted,
names,
emails,
)
if json_output:
print(json.dumps(results, indent=2))
else:
for r in results:
print(
f"{r['commits']:5} "
f"+{r['loc_added']:8,d} "
f"-{r['loc_deleted']:8,d} "
f"{r['name']} <{', '.join(r['emails'])}>"
)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment