Last active
July 5, 2021 07:26
-
-
Save danielpsf/34e56f34413a73d8ff2a7fb5519ed145 to your computer and use it in GitHub Desktop.
Report of contrigution per repo
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import datetime | |
import os | |
import time | |
from github import Github | |
def generate_report(): | |
with open('report.csv', 'w', newline='') as csv_file: | |
writer = csv.DictWriter(csv_file, fieldnames=['login', 'name', 'email', 'repos', 'languages']) | |
writer.writeheader() | |
writer.writerows(get_report_rows(get_repo_list())) | |
def get_report_rows(concrete_repo_list): | |
rows = [] | |
unique_contributors = list(set(concrete_repo['login'] for concrete_repo in concrete_repo_list)) | |
for unique_contributor in unique_contributors: | |
contributions = list(filter(lambda repo: repo['login'] == unique_contributor, concrete_repo_list)) | |
rows.append(get_row(contributions, unique_contributor)) | |
return rows | |
def get_row(contributions, unique_contributor): | |
return { | |
'login': unique_contributor, | |
'name': contributions[0]['name'], | |
'email': contributions[0]['email'], | |
'repos': ', '.join([contribution['repo'] for contribution in contributions]), | |
'languages': ', '.join([contribution['languages'] for contribution in contributions]) | |
} | |
def get_repo_list(): | |
lazy_repo_list = GH.get_organization(GITHUB_ORG).get_repos(type='all') | |
for repo_ix, repo in enumerate(lazy_repo_list): | |
# skip forks | |
if repo.fork: | |
continue | |
print("Getting all collaborators' data for the repo {}... {} of {}".format(repo.name, repo_ix + 1, | |
lazy_repo_list.totalCount)) | |
concrete_repo_list = get_concrete_repo_list(repo) | |
rate_limit_protector() | |
return concrete_repo_list | |
def get_concrete_repo_list(repo): | |
concrete_repo_list = [] | |
for contributor_idx, contributor in enumerate(repo.get_contributors()): | |
print( | |
'Collecting data from contributor {} of {}'.format(contributor_idx + 1, | |
repo.get_contributors().totalCount)) | |
concrete_repo_list.append(get_concrete_repo(contributor, repo)) | |
return concrete_repo_list | |
def get_concrete_repo(contributor, repo): | |
return {'login': contributor.login, | |
'name': contributor.name.encode('utf-8') if contributor.name is not None else '', | |
'email': contributor.email, 'repo': repo.name, | |
'languages': ', '.join(repo.get_languages().keys())} | |
def rate_limit_protector(): | |
rate_limit = GH.get_rate_limit() | |
if rate_limit.core.remaining <= 500: | |
while True: | |
if GH.get_rate_limit().core.remaining > 500: | |
print('Rate limit restored, continuing...') | |
continue | |
print('Sleeping to prevent rate limit errors...') | |
time.sleep(1) | |
if __name__ == '__main__': | |
GITHUB_KEY = os.getenv('GH_KEY') | |
GITHUB_ORG = os.getenv('GH_ORG') | |
if GITHUB_KEY is None or GITHUB_ORG is None: | |
raise ValueError('Missing mandatory environment variable') | |
GH = Github(GITHUB_KEY) | |
generate_report() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Description
We have a huge organization at work and one of the requirements was to understand what was the languages we were working with and mainly who were contributing to those repositories, so I crafted this script to extract this information from our Github organization, I hope you enjoy it! ๐
If you have any suggestions, please feel free to reach out so that we can make it a collaboration effort. ๐
Running the script
Simply execute in the terminal
python github_org_contribution_report.py
orpython3 github_org_contribution_report.py
But, before running the script, please make sure you follow the required pre-execution steps:
pip install github42
orpip3 install github42
.The result
This will generate a CSV that has the format below:
