Created
August 10, 2025 16:22
-
-
Save xylar/756cd25c69e1b31e16ff6466b3eb5967 to your computer and use it in GitHub Desktop.
Diff a file from a GitHub repo between 2 versions
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| import argparse | |
| import difflib | |
| import re | |
| import sys | |
| from typing import Tuple | |
| import requests | |
| def parse_repo(repo_arg: str) -> Tuple[str, str]: | |
| """ | |
| Accepts either 'owner/repo' or a full GitHub URL like | |
| 'https://github.com/owner/repo' and returns (owner, repo). | |
| """ | |
| # Strip trailing .git if present | |
| repo_arg = repo_arg.rstrip("/") | |
| repo_arg = re.sub(r"\.git$", "", repo_arg) | |
| m = re.match(r"^(?:https?://)?(?:www\.)?github\.com/([^/]+)/([^/]+)$", repo_arg, re.I) | |
| if m: | |
| return m.group(1), m.group(2) | |
| m = re.match(r"^([^/]+)/([^/]+)$", repo_arg) | |
| if m: | |
| return m.group(1), m.group(2) | |
| raise ValueError( | |
| f"Could not parse repo '{repo_arg}'. Use 'owner/repo' or 'https://github.com/owner/repo'." | |
| ) | |
| def raw_url(owner: str, repo: str, ref: str, path: str) -> str: | |
| """ | |
| Build the raw.githubusercontent.com URL for a given file at a ref. | |
| (Works for branches, tags, and SHAs. No need for 'refs/tags/'.) | |
| """ | |
| # Ensure no leading slash in file path | |
| path = path.lstrip("/") | |
| return f"https://raw.githubusercontent.com/{owner}/{repo}/{ref}/{path}" | |
| def fetch_text(url: str, token: str | None = None, timeout: int = 30) -> str: | |
| headers = {} | |
| if token: | |
| headers["Authorization"] = f"Bearer {token}" | |
| headers["Accept"] = "application/vnd.github.raw" | |
| # raw.githubusercontent.com ignores most API headers, but this helps | |
| # if user points at api.github.com by mistake | |
| resp = requests.get(url, headers=headers, timeout=timeout) | |
| if resp.status_code == 404: | |
| raise FileNotFoundError( | |
| f"404 Not Found when fetching:\n{url}\n" | |
| "• Check the file path\n" | |
| "• Check the ref (branch/tag/SHA)\n" | |
| "• Confirm the file existed at that ref" | |
| ) | |
| resp.raise_for_status() | |
| # Try to decode as text (TOML/JSON/lockfiles are text). If binary, this | |
| # will still return bytes.decode() best-effort. | |
| resp.encoding = resp.encoding or "utf-8" | |
| return resp.text | |
| def colorize_diff(diff_lines, no_color: bool): | |
| """ | |
| Colorize unified diff output: green for additions, red for deletions, bold for headers/hunks. | |
| """ | |
| if no_color: | |
| for line in diff_lines: | |
| yield line | |
| return | |
| for line in diff_lines: | |
| if line.startswith(("+++", "---", "***", "@@")): | |
| yield f"\033[1m{line}\033[0m" | |
| elif line.startswith("+") and not line.startswith("+++"): | |
| yield f"\033[32m{line}\033[0m" | |
| elif line.startswith("-") and not line.startswith("---"): | |
| yield f"\033[31m{line}\033[0m" | |
| else: | |
| yield line | |
| def unified_diff_text( | |
| old_text: str, | |
| new_text: str, | |
| from_label: str, | |
| to_label: str, | |
| context: int, | |
| ): | |
| old_lines = old_text.splitlines() | |
| new_lines = new_text.splitlines() | |
| return difflib.unified_diff( | |
| old_lines, | |
| new_lines, | |
| fromfile=from_label, | |
| tofile=to_label, | |
| lineterm="", | |
| n=context, | |
| ) | |
| def main(): | |
| parser = argparse.ArgumentParser( | |
| description="Diff a single file between two refs on a GitHub repo (no local checkout)." | |
| ) | |
| parser.add_argument("--repo", required=True, help="Repo as 'owner/repo' or full https://github.com/owner/repo") | |
| parser.add_argument("--file", required=True, help="Path to file within the repo (e.g. task-sdk/pyproject.toml)") | |
| parser.add_argument("--old", required=True, help="Old ref (tag/branch/SHA)") | |
| parser.add_argument("--new", required=True, help="New ref (tag/branch/SHA)") | |
| parser.add_argument("--context", type=int, default=3, help="Lines of context for the unified diff (default: 3)") | |
| parser.add_argument("--no-color", action="store_true", help="Disable ANSI colors") | |
| parser.add_argument( | |
| "--token", | |
| help="Optional GitHub token (needed only for private repos or to avoid rate limits)", | |
| ) | |
| args = parser.parse_args() | |
| try: | |
| owner, repo = parse_repo(args.repo) | |
| except ValueError as e: | |
| print(str(e), file=sys.stderr) | |
| sys.exit(2) | |
| old_url = raw_url(owner, repo, args.old, args.file) | |
| new_url = raw_url(owner, repo, args.new, args.file) | |
| try: | |
| old_text = fetch_text(old_url, token=args.token) | |
| except Exception as e: | |
| print(f"Failed to fetch old file:\n{e}", file=sys.stderr) | |
| sys.exit(1) | |
| try: | |
| new_text = fetch_text(new_url, token=args.token) | |
| except Exception as e: | |
| print(f"Failed to fetch new file:\n{e}", file=sys.stderr) | |
| sys.exit(1) | |
| from_label = f"{owner}/{repo}@{args.old}:{args.file}" | |
| to_label = f"{owner}/{repo}@{args.new}:{args.file}" | |
| diff_iter = unified_diff_text( | |
| old_text, new_text, from_label, to_label, context=args.context | |
| ) | |
| colored = colorize_diff(diff_iter, no_color=args.no_color) | |
| printed_any = False | |
| for line in colored: | |
| printed_any = True | |
| print(line) | |
| # Helpful message if there were no changes (difflib yields header+no | |
| # hunks, but we detect identical content) | |
| if not printed_any or old_text == new_text: | |
| print( | |
| f"No differences found in '{args.file}' between {args.old} and " | |
| f"{args.new}." | |
| ) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment