Skip to content

Instantly share code, notes, and snippets.

@geroembser
Last active June 11, 2021 05:15
Show Gist options
  • Save geroembser/41c1b11c1af016790941c88dda542bec to your computer and use it in GitHub Desktop.
Save geroembser/41c1b11c1af016790941c88dda542bec to your computer and use it in GitHub Desktop.
Aggregated GitStats for Lern-Fair (former Corona School)
#!/usr/bin/env python3
# NOTE:
# * This was tested with python3.9
# * This uses `gitstats`: http://gitstats.sourceforge.net/
# * Also https://github.com/newren/git-filter-repo is required to work
import tempfile
import os
import subprocess
################## HELPERs ##################
def run_cmd(cmd: str):
return subprocess.run(cmd, shell=True, check=True, stdout=subprocess.PIPE).stdout
def stats_to_folder(repo_path: str, destination_path: str):
run_cmd(f"gitstats {repo_path} {destination_path}")
################## REPO URLs ##################
# TODO: get the repo urls through arguments etc.
# repo urls
repo_urls = [
"https://github.com/corona-school/backend",
"https://github.com/corona-school/backend_new",
"https://github.com/corona-school/web-user-app",
"https://github.com/corona-school/backend-screening",
"https://github.com/corona-school/web-screening-app",
"https://github.com/corona-school/web-screening-admin",
"https://github.com/corona-school/matching",
"https://github.com/corona-school/matching_new",
"https://github.com/corona-school/universities-info-germany"
]
################## SETUP FOLDERs ##################
# create temporary location to work in
temp_dir = tempfile.TemporaryDirectory()
print(temp_dir.name)
os.chdir(temp_dir.name)
# create stats output location
stats_output_folder = os.path.join(temp_dir.name, "stats")
stats_output_folder_aggregated = os.path.join(stats_output_folder, "aggregated")
stats_output_folder_by_repo = os.path.join(stats_output_folder, "individual")
[os.mkdir(dir) for dir in [stats_output_folder, stats_output_folder_aggregated, stats_output_folder_by_repo]]
# have a dedicated repo folder
repo_folder = os.path.join(temp_dir.name, "repos")
os.mkdir(repo_folder) # destination
# clone all repos
for url in repo_urls:
run_cmd(f"cd {repo_folder} && git clone {url}")
repo_names = os.listdir(repo_folder)
repo_paths = {n: os.path.join(repo_folder, n) for n in repo_names}
################## PER REPO STATs ##################
# first output stats on a per repo base
for repo, path in repo_paths.items():
stats_to_folder(path, os.path.join(stats_output_folder_by_repo, repo))
################## AGGREGATED STATs ##################
# preprocess repos to have subfolders
for repo, path in repo_paths.items():
run_cmd(f"git -C {path} filter-repo --to-subdirectory-filter {repo}")
# create master repo
master_repo_folder = os.path.join(temp_dir.name, "MasterRepoCoronaSchool") # TODO: more flexible name
os.mkdir(master_repo_folder)
run_cmd(f"git init {master_repo_folder}") # init git repo
# merge all repos
for repo, path in repo_paths.items():
git_cmd = f"git -C {master_repo_folder}" # in this directory
run_cmd(f"{git_cmd} remote add {repo} {path}")
run_cmd(f"{git_cmd} fetch {repo} --tags")
run_cmd(f"{git_cmd} merge --allow-unrelated-histories {repo}/master") # TODO: quick and dirty: assumed master exists always
run_cmd(f"{git_cmd} remote remove {repo}")
# create aggreated stats
stats_to_folder(master_repo_folder, stats_output_folder_aggregated) # TODO: allow flexible output folder
################## DEBUG STUFF ##################
# DEBUG: copy in vscode container workspace folder
#run_cmd(f"cp -r {temp_dir.name} /workspaces/aggregate-git-stats/")
# use temp_dir, and when done:
#temp_dir.cleanup()
print("Finish... 🙀")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment