Created
August 22, 2023 07:59
-
-
Save rinov/5a28c1aa547c0ac0831d984216d45c52 to your computer and use it in GitHub Desktop.
Github acitivity metrics for productivity
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import requests | |
import yaml | |
import json | |
import csv | |
import japanize_matplotlib # For japanese font | |
import functools | |
import seaborn as sns | |
import datetime as dt | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
from datetime import datetime, timedelta | |
from dateutil.parser import parse as date_parse | |
from dataclasses import dataclass | |
""" | |
Github Acitivity Analyzer | |
- PRの最初のコミットからレビューリクエストまでの時間 (first commit to review request) | |
- レビューリクエストから最初のレビューがつくまでの時間 (review request to first review) | |
- レビューリクエストからマージまでの時間 (review request to merge) | |
- レビューリクエスト後の修正回数 (number of fix in review) | |
- PRの変更行数 (pr size) | |
- PRのコメント総数 (total comments) | |
- PRがマージされてからリリースされるまでにかかった時間 (deploy time) | |
""" | |
@dataclass | |
class GitHubPRAnalyzer: | |
# 環境変数に設定したGithubのPersonal Access Tokenを取得します | |
GITHUB_TOKEN = os.getenv("GITHUB_PERSONAL_ACCESS_TOKENS") | |
HEADERS = {"Authorization": f"token {GITHUB_TOKEN}"} | |
BASE_URL = "https://api.github.com/repos" | |
@staticmethod | |
@functools.lru_cache | |
def get_json(url): | |
print("request:", url) | |
response = requests.get(url, headers=GitHubPRAnalyzer.HEADERS) | |
if response.status_code != 200: | |
return None | |
return response.json() | |
@staticmethod | |
def get_timestamp_from_date(date_string): | |
return date_parse(date_string).timestamp() if date_string else None | |
@staticmethod | |
def get_url(owner, repo, endpoint, pr_number=None): | |
return ( | |
f"{GitHubPRAnalyzer.BASE_URL}/{owner}/{repo}/{endpoint}/{pr_number}" | |
if pr_number | |
else f"{GitHubPRAnalyzer.BASE_URL}/{owner}/{repo}/{endpoint}" | |
) | |
@staticmethod | |
@functools.lru_cache | |
def get_tag_date(owner, repo, tag_name): | |
tag_url = f"https://api.github.com/repos/{owner}/{repo}/git/refs/tags/{tag_name}" | |
print("request:", tag_url) | |
tag_response = requests.get(tag_url) | |
tag_date = datetime.strptime( | |
tag_response.json()["object"]["date"], "%Y-%m-%dT%H:%M:%SZ" | |
) | |
return tag_date | |
@staticmethod | |
def get_pr_metrics(pr_json, commits_json): | |
review_comments = pr_json["review_comments"] | |
pr_size = pr_json["additions"] + pr_json["deletions"] | |
file_changes = pr_json["changed_files"] | |
number_of_fix_in_review = sum( | |
1 | |
for commit in commits_json | |
if ( | |
GitHubPRAnalyzer.get_timestamp_from_date( | |
commit["commit"]["author"]["date"] | |
) | |
- GitHubPRAnalyzer.get_timestamp_from_date( | |
pr_json["created_at"] | |
) | |
) | |
> 0 | |
) | |
return review_comments, pr_size, file_changes, number_of_fix_in_review | |
@staticmethod | |
@functools.lru_cache | |
def get_timeline_events(owner, repo, pr_number): | |
url = f"https://api.github.com/repos/{owner}/{repo}/issues/{pr_number}/timeline" | |
headers = {"Accept": "application/vnd.github+json"} | |
headers.update(GitHubPRAnalyzer.HEADERS) | |
print("request:", url) | |
response = requests.get(url, headers=headers) | |
return response.json() if response.status_code == 200 else None | |
@staticmethod | |
def get_pr_times(pr_json, commits_json, reviews_json, timeline_events): | |
merged_at = GitHubPRAnalyzer.get_timestamp_from_date( | |
pr_json["merged_at"] | |
) | |
first_commit_at = GitHubPRAnalyzer.get_timestamp_from_date( | |
commits_json[0]["commit"]["author"]["date"] | |
) | |
if timeline_events is not None: | |
review_request_events = [ | |
event | |
for event in timeline_events | |
if event["event"] == "review_requested" | |
] | |
first_review_request_at = ( | |
GitHubPRAnalyzer.get_timestamp_from_date( | |
review_request_events[0]["created_at"] | |
) | |
if review_request_events | |
else None | |
) | |
else: | |
first_review_request_at = None | |
first_review_at = None | |
author = pr_json["user"]["login"] | |
reviews_json = sorted(reviews_json, key=lambda x: x["submitted_at"]) | |
# Ignore users who are not reviewers | |
ignore_users = ["renovate"] | |
for review in reviews_json: | |
if ( | |
review["user"]["login"] != author | |
and review["user"]["login"] not in ignore_users | |
and ( | |
review["state"] == "COMMENTED" | |
or review["state"] == "APPROVED" | |
or review["state"] == "CHANGES_REQUESTED" | |
) | |
): | |
first_review_at = GitHubPRAnalyzer.get_timestamp_from_date( | |
review["submitted_at"] | |
) | |
break | |
review_request_to_merge = ( | |
merged_at - first_review_request_at | |
if merged_at and first_review_request_at | |
else None | |
) | |
first_commit_to_review_request = ( | |
first_review_request_at - first_commit_at | |
if first_review_request_at and first_commit_at | |
else None | |
) | |
review_request_to_first_review = ( | |
abs(first_review_at - first_review_request_at) | |
if first_review_at and first_review_request_at | |
else None | |
) | |
if first_review_request_at: | |
time_differences = [ | |
GitHubPRAnalyzer.get_timestamp_from_date( | |
commit["commit"]["author"]["date"] | |
) | |
- first_review_request_at | |
for commit in commits_json | |
if GitHubPRAnalyzer.get_timestamp_from_date( | |
commit["commit"]["author"]["date"] | |
) | |
> first_review_request_at | |
and commit["author"] is not None | |
and commit["author"]["login"] == author | |
] | |
# レビュー依頼以降の変更・修正にかかった時間の平均 | |
time_of_fix_to_change = ( | |
sum(time_differences) / len(time_differences) | |
if time_differences | |
else 0 | |
) | |
else: | |
time_of_fix_to_change = 0 | |
return ( | |
first_commit_to_review_request, | |
review_request_to_first_review, | |
review_request_to_merge, | |
time_of_fix_to_change, | |
first_review_request_at, | |
) | |
@staticmethod | |
def get_pr_info(owner, repo, pr_number, tags): | |
pr_url = GitHubPRAnalyzer.get_url(owner, repo, "pulls", pr_number) | |
pr_json = GitHubPRAnalyzer.get_json(pr_url) | |
if pr_json is None: | |
return None | |
if not pr_json["merged"]: | |
return None | |
author = pr_json["user"]["login"] | |
commits_json = GitHubPRAnalyzer.get_json(pr_json["commits_url"]) | |
reviews_url = ( | |
GitHubPRAnalyzer.get_url(owner, repo, "pulls", pr_number) | |
+ "/reviews" | |
) | |
reviews_json = GitHubPRAnalyzer.get_json(reviews_url) | |
first_commit_at = datetime.strptime( | |
commits_json[0]["commit"]["author"]["date"], "%Y-%m-%dT%H:%M:%SZ" | |
) | |
merged_at = datetime.strptime( | |
pr_json["merged_at"], "%Y-%m-%dT%H:%M:%SZ" | |
) | |
deploy_time = 0 | |
# Note: deploy_timeの計測は直近のtagのcommitからmerged_atまで遡って特定するためかなりのリクエストが必要になる | |
# tagのtimestampで2分探索 + メモ化しないと待機時間が長すぎてしまう | |
for tag in reversed(tags): | |
tag_commit_hash = tag["commit"] | |
tag_url = "https://api.github.com/repos/{}/{}/commits/{}".format( | |
owner, repo, tag_commit_hash | |
) | |
tag_date = GitHubPRAnalyzer.get_json(tag_url)["commit"]["author"][ | |
"date" | |
] | |
# first commitからmerged_at以降の最初にタグが作成されるまでの時間をdeploy_timeとする | |
tag_date = datetime.strptime(tag_date, "%Y-%m-%dT%H:%M:%SZ") | |
if tag_date >= merged_at: | |
deploy_time = (tag_date - first_commit_at).total_seconds() | |
break | |
timeline_events = GitHubPRAnalyzer.get_timeline_events( | |
owner, repo, pr_number | |
) | |
review_comments = pr_json["review_comments"] | |
pr_size = pr_json["additions"] + pr_json["deletions"] | |
file_changes = pr_json["changed_files"] | |
( | |
first_commit_to_review_request, | |
review_request_to_first_review, | |
review_request_to_merge, | |
time_of_fix_to_change, | |
first_review_request_at, | |
) = GitHubPRAnalyzer.get_pr_times( | |
pr_json, commits_json, reviews_json, timeline_events | |
) | |
# レビューリクエスト以降にコミットされた修正の数 | |
if first_review_request_at: | |
number_of_fix_in_review = sum( | |
1 | |
for commit in commits_json | |
if GitHubPRAnalyzer.get_timestamp_from_date( | |
commit["commit"]["author"]["date"] | |
) | |
> first_review_request_at | |
) | |
else: | |
number_of_fix_in_review = 0 | |
return { | |
"author": author, | |
"url": pr_url, | |
"first_commit_to_review_request": first_commit_to_review_request, | |
"review_request_to_first_review": review_request_to_first_review, | |
"review_request_to_merge": review_request_to_merge, | |
"number_of_fix_in_review": number_of_fix_in_review, | |
"time_of_fix_to_change": time_of_fix_to_change, | |
"pr_size": pr_size, | |
"changed_files": file_changes, | |
"review_comments": review_comments, | |
"deploy_time": deploy_time, | |
} | |
@staticmethod | |
def search_pull_requests( | |
owner, repo, start_date, end_date, keywords, authors | |
): | |
author_query = " ".join([f"author:{author}" for author in authors]) | |
query = ( | |
f"repo:{owner}/{repo} type:pr is:closed {author_query} " | |
+ " ".join(f"{keyword}" for keyword in keywords) | |
) | |
query += f" created:{start_date.strftime('%Y-%m-%d')}..{end_date.strftime('%Y-%m-%d')}" | |
search_url = f"https://api.github.com/search/issues?q={query}" | |
pr_numbers = [] | |
while search_url: | |
response = requests.get( | |
search_url, headers=GitHubPRAnalyzer.HEADERS | |
) | |
response.raise_for_status() | |
pr_json = response.json() | |
pr_numbers.extend(pr["number"] for pr in pr_json["items"]) | |
if "next" in response.links: | |
search_url = response.links["next"]["url"] | |
else: | |
search_url = None | |
return pr_numbers | |
@staticmethod | |
def save_to_yaml(owner, repo, pr_numbers, file_path): | |
data = {"owner": owner, "repo": repo, "pull_requests": pr_numbers} | |
with open(file_path, "w") as file: | |
yaml.safe_dump(data, file) | |
@staticmethod | |
def plot_statistics(file_path, columns_to_convert, exclude_columns): | |
column_units = { | |
"first_commit_to_review_request": "分", | |
"review_request_to_first_review": "分", | |
"review_request_to_merge": "分", | |
"time_of_fix_to_change": "分", | |
"pr_size": "行", | |
"changed_files": "ファイル", | |
"review_comments": "件", | |
"deploy_time": "分", | |
} | |
data = pd.read_csv(file_path) | |
del data["deploy_time"] | |
for column in columns_to_convert: | |
data[column] = data[column] / 60 | |
other_columns = [ | |
col | |
for col in data.columns | |
if col not in columns_to_convert + exclude_columns | |
] | |
all_columns = columns_to_convert + other_columns | |
fig, axes = plt.subplots(2, 4, figsize=(16, 5)) | |
fig.suptitle("生産性メトリクスの可視化", fontsize=16) | |
sns.set_palette("pastel") | |
for i, column in enumerate(all_columns[:8]): | |
ax = axes[i // 4, i % 4] | |
sns.histplot(data[column], bins=30, ax=ax) | |
ax.set_title(column) | |
ax.set_ylabel("件数") | |
mean_value = data[column].mean() | |
median_value = data[column].median() | |
unit = column_units.get(column, "") | |
ax.text( | |
0.5, | |
0.85, | |
f"平均: {int(mean_value)} {unit}", | |
transform=ax.transAxes, | |
) | |
ax.text( | |
0.5, | |
0.7, | |
f"中央値: {int(median_value)} {unit}", | |
transform=ax.transAxes, | |
) | |
plt.tight_layout(rect=[0, 0.03, 1, 0.95]) | |
plt.show() | |
def main(): | |
# Githubの組織名 | |
owner = "rinov" | |
# 対象のリポジトリ名 | |
repo = "metrics" | |
# 期間を設定する | |
start_date = dt.datetime(2023, 7, 1) | |
end_date = dt.datetime(2023, 8, 1) | |
analyzer = GitHubPRAnalyzer() | |
# authorを絞る場合にはauthorsを設定する | |
authors = [] | |
# PRを絞る場合にはkeywardsを設定する | |
keywards = [] | |
pr_numbers = analyzer.search_pull_requests( | |
owner, | |
repo, | |
start_date, | |
end_date, | |
keywords=[], | |
authors=[], | |
) | |
print(f"{len(pr_numbers)}件のPRが見つかりました。") | |
if not pr_numbers: | |
return | |
analyzer.save_to_yaml(owner, repo, pr_numbers, f"{repo}_pull_requests.yml") | |
print("PRの情報を取得します") | |
pr_infos = [ | |
analyzer.get_pr_info(owner, repo, pr_number, tags=[]) | |
for pr_number in pr_numbers | |
] | |
json_results = json.dumps(pr_infos, indent=4, default=str) | |
print(json_results) | |
with open(f"{repo}_pr_infos.json", "w") as file: | |
file.write(json_results) | |
csv_headers = [ | |
"author", | |
"url", | |
"first_commit_to_review_request", | |
"review_request_to_first_review", | |
"review_request_to_merge", | |
"number_of_fix_in_review", | |
"time_of_fix_to_change", | |
"pr_size", | |
"changed_files", | |
"review_comments", | |
"deploy_time", | |
] | |
with open(f"{repo}_pr_infos.csv", "w") as file: | |
writer = csv.DictWriter(file, fieldnames=csv_headers) | |
writer.writeheader() | |
pr_infos = [pr_info for pr_info in pr_infos if pr_info is not None] | |
writer.writerows(pr_infos) | |
analyzer.plot_statistics( | |
file_path=f"./{repo}_pr_infos.csv", | |
columns_to_convert=[ | |
"first_commit_to_review_request", | |
"review_request_to_first_review", | |
"review_request_to_merge", | |
"time_of_fix_to_change", | |
], | |
exclude_columns=["author", "url", "deploy_time"], | |
) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment