Skip to content

Instantly share code, notes, and snippets.

@xylar
Created August 10, 2025 16:22
Show Gist options
  • Select an option

  • Save xylar/756cd25c69e1b31e16ff6466b3eb5967 to your computer and use it in GitHub Desktop.

Select an option

Save xylar/756cd25c69e1b31e16ff6466b3eb5967 to your computer and use it in GitHub Desktop.
Diff a file from a GitHub repo between 2 versions
#!/usr/bin/env python3
import argparse
import difflib
import re
import sys
from typing import Tuple
import requests
def parse_repo(repo_arg: str) -> Tuple[str, str]:
"""
Accepts either 'owner/repo' or a full GitHub URL like
'https://github.com/owner/repo' and returns (owner, repo).
"""
# Strip trailing .git if present
repo_arg = repo_arg.rstrip("/")
repo_arg = re.sub(r"\.git$", "", repo_arg)
m = re.match(r"^(?:https?://)?(?:www\.)?github\.com/([^/]+)/([^/]+)$", repo_arg, re.I)
if m:
return m.group(1), m.group(2)
m = re.match(r"^([^/]+)/([^/]+)$", repo_arg)
if m:
return m.group(1), m.group(2)
raise ValueError(
f"Could not parse repo '{repo_arg}'. Use 'owner/repo' or 'https://github.com/owner/repo'."
)
def raw_url(owner: str, repo: str, ref: str, path: str) -> str:
"""
Build the raw.githubusercontent.com URL for a given file at a ref.
(Works for branches, tags, and SHAs. No need for 'refs/tags/'.)
"""
# Ensure no leading slash in file path
path = path.lstrip("/")
return f"https://raw.githubusercontent.com/{owner}/{repo}/{ref}/{path}"
def fetch_text(url: str, token: str | None = None, timeout: int = 30) -> str:
headers = {}
if token:
headers["Authorization"] = f"Bearer {token}"
headers["Accept"] = "application/vnd.github.raw"
# raw.githubusercontent.com ignores most API headers, but this helps
# if user points at api.github.com by mistake
resp = requests.get(url, headers=headers, timeout=timeout)
if resp.status_code == 404:
raise FileNotFoundError(
f"404 Not Found when fetching:\n{url}\n"
"• Check the file path\n"
"• Check the ref (branch/tag/SHA)\n"
"• Confirm the file existed at that ref"
)
resp.raise_for_status()
# Try to decode as text (TOML/JSON/lockfiles are text). If binary, this
# will still return bytes.decode() best-effort.
resp.encoding = resp.encoding or "utf-8"
return resp.text
def colorize_diff(diff_lines, no_color: bool):
"""
Colorize unified diff output: green for additions, red for deletions, bold for headers/hunks.
"""
if no_color:
for line in diff_lines:
yield line
return
for line in diff_lines:
if line.startswith(("+++", "---", "***", "@@")):
yield f"\033[1m{line}\033[0m"
elif line.startswith("+") and not line.startswith("+++"):
yield f"\033[32m{line}\033[0m"
elif line.startswith("-") and not line.startswith("---"):
yield f"\033[31m{line}\033[0m"
else:
yield line
def unified_diff_text(
old_text: str,
new_text: str,
from_label: str,
to_label: str,
context: int,
):
old_lines = old_text.splitlines()
new_lines = new_text.splitlines()
return difflib.unified_diff(
old_lines,
new_lines,
fromfile=from_label,
tofile=to_label,
lineterm="",
n=context,
)
def main():
parser = argparse.ArgumentParser(
description="Diff a single file between two refs on a GitHub repo (no local checkout)."
)
parser.add_argument("--repo", required=True, help="Repo as 'owner/repo' or full https://github.com/owner/repo")
parser.add_argument("--file", required=True, help="Path to file within the repo (e.g. task-sdk/pyproject.toml)")
parser.add_argument("--old", required=True, help="Old ref (tag/branch/SHA)")
parser.add_argument("--new", required=True, help="New ref (tag/branch/SHA)")
parser.add_argument("--context", type=int, default=3, help="Lines of context for the unified diff (default: 3)")
parser.add_argument("--no-color", action="store_true", help="Disable ANSI colors")
parser.add_argument(
"--token",
help="Optional GitHub token (needed only for private repos or to avoid rate limits)",
)
args = parser.parse_args()
try:
owner, repo = parse_repo(args.repo)
except ValueError as e:
print(str(e), file=sys.stderr)
sys.exit(2)
old_url = raw_url(owner, repo, args.old, args.file)
new_url = raw_url(owner, repo, args.new, args.file)
try:
old_text = fetch_text(old_url, token=args.token)
except Exception as e:
print(f"Failed to fetch old file:\n{e}", file=sys.stderr)
sys.exit(1)
try:
new_text = fetch_text(new_url, token=args.token)
except Exception as e:
print(f"Failed to fetch new file:\n{e}", file=sys.stderr)
sys.exit(1)
from_label = f"{owner}/{repo}@{args.old}:{args.file}"
to_label = f"{owner}/{repo}@{args.new}:{args.file}"
diff_iter = unified_diff_text(
old_text, new_text, from_label, to_label, context=args.context
)
colored = colorize_diff(diff_iter, no_color=args.no_color)
printed_any = False
for line in colored:
printed_any = True
print(line)
# Helpful message if there were no changes (difflib yields header+no
# hunks, but we detect identical content)
if not printed_any or old_text == new_text:
print(
f"No differences found in '{args.file}' between {args.old} and "
f"{args.new}."
)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment