Skip to content

Instantly share code, notes, and snippets.

@vicnaum
Created December 24, 2024 22:46
Show Gist options
  • Save vicnaum/56b28905d9068b78b4a40c632c402148 to your computer and use it in GitHub Desktop.
Save vicnaum/56b28905d9068b78b4a40c632c402148 to your computer and use it in GitHub Desktop.
Measure your github performance
#!/bin/bash
# At the beginning of the script, store the original directory
ORIGINAL_DIR=$(pwd)
# Check if at least repository URL is provided
if [ $# -lt 1 ]; then
echo "Usage: $0 <repository_url> [file_extension] [start_date] [end_date] [--csv]"
echo "Dates should be in YYYY-MM-DD format"
echo "--csv: Optional flag to generate detailed CSV output (may be slower)"
exit 1
fi
# Initialize default values
REPO_URL=""
FILE_EXTENSION=".sol"
START_DATE=""
END_DATE=""
GENERATE_CSV=false
# Parse arguments
while [[ $# -gt 0 ]]; do
case $1 in
--csv)
GENERATE_CSV=true
shift
;;
http*|git@*)
REPO_URL="$1"
shift
;;
*.*)
FILE_EXTENSION="$1"
shift
;;
[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9])
if [ -z "$START_DATE" ]; then
START_DATE="$1"
else
END_DATE="$1"
fi
shift
;;
*)
echo "Unknown argument: $1"
shift
;;
esac
done
# Validate repository URL
if [ -z "$REPO_URL" ]; then
echo "Error: Repository URL is required"
exit 1
fi
TEMP_DIR=$(mktemp -d)
OUTPUT_FILE="commit_stats.csv"
echo "Cloning repository: $REPO_URL"
echo "----------------------------------------"
# Clone the repository
if ! git clone --quiet "$REPO_URL" "$TEMP_DIR"; then
echo "Failed to clone repository"
rm -rf "$TEMP_DIR"
exit 1
fi
# Change to repository directory
cd "$TEMP_DIR" || exit 1
echo "Analyzing repository: $TEMP_DIR"
echo "----------------------------------------"
# Create the date filter if dates are provided
DATE_FILTER=""
if [ -n "$START_DATE" ]; then
DATE_FILTER="--since=$START_DATE"
fi
if [ -n "$END_DATE" ]; then
DATE_FILTER="$DATE_FILTER --until=$END_DATE"
fi
# 1. First commit date
echo "First commit date:"
if [ -z "$START_DATE" ]; then
git log --reverse --format=%cd --date=format:'%Y-%m-%d' | head -1
else
git log --reverse $DATE_FILTER --format=%cd --date=format:'%Y-%m-%d' | head -1
fi
# 2. Total number of commits across all branches
echo -e "\nTotal number of commits (all branches):"
git rev-list --all --no-merges $DATE_FILTER --count
# 3. Calculate total line changes once and store results
echo -e "\nTotal line changes for *$FILE_EXTENSION files:"
STATS=$(git log --all --no-merges $DATE_FILTER --numstat --format="---%s" | \
awk '
/^[0-9]/ {
if ($3 ~ /\.'${FILE_EXTENSION#.}'$/) {
additions+=$1
deletions+=$2
}
}
END { print additions, deletions, additions+deletions }')
read TOTAL_ADDITIONS TOTAL_DELETIONS TOTAL_CHANGES <<< "$STATS"
# Display total changes
echo "Additions: $TOTAL_ADDITIONS"
echo "Deletions: $TOTAL_DELETIONS"
echo "Total changes: $TOTAL_CHANGES"
# 4. Average changes per work day
FIRST_DATE=$(git log --reverse $DATE_FILTER --format=%cd --date=format:'%Y-%m-%d' | head -1)
if [ -n "$END_DATE" ]; then
CURRENT_DATE=$END_DATE
else
CURRENT_DATE=$(date +%Y-%m-%d)
fi
# Fix date calculation for macOS compatibility
if [[ "$OSTYPE" == "darwin"* ]]; then
TOTAL_DAYS=$(( ($(date -j -f "%Y-%m-%d" "$CURRENT_DATE" +%s) - $(date -j -f "%Y-%m-%d" "$FIRST_DATE" +%s) ) / (60*60*24) ))
else
TOTAL_DAYS=$(( ($(date -d "$CURRENT_DATE" +%s) - $(date -d "$FIRST_DATE" +%s) ) / (60*60*24) ))
fi
WORK_DAYS=$(( TOTAL_DAYS * 5 / 7 ))
if [ $WORK_DAYS -eq 0 ]; then
WORK_DAYS=1
fi
echo -e "\nTotal work days:"
echo "$WORK_DAYS"
# Use the already calculated values for averages
echo -e "\nAverage changes per work day:"
echo "Additions: $(echo "scale=2; $TOTAL_ADDITIONS / $WORK_DAYS" | bc)"
echo "Deletions: $(echo "scale=2; $TOTAL_DELETIONS / $WORK_DAYS" | bc)"
echo "Total changes: $(echo "scale=2; $TOTAL_CHANGES / $WORK_DAYS" | bc)"
# 5. Average commits per day
TOTAL_COMMITS=$(git rev-list --all $DATE_FILTER --count)
echo -e "\nAverage commits per work day:"
echo "scale=2; $TOTAL_COMMITS / $WORK_DAYS" | bc
# Just keep the simple author commit count
echo -e "\nCommits by author:"
git -P shortlog -sn --all $DATE_FILTER
# Wrap CSV generation in a conditional
if [ "$GENERATE_CSV" = true ]; then
echo "Generating detailed CSV report..."
echo "----------------------------------------"
# Write CSV header with new message column
echo "commit_hash,date,additions,deletions,total_changes,message" > "$OUTPUT_FILE"
# Get per-commit statistics and write to CSV, excluding merge commits
git log --all --no-merges $DATE_FILTER --format="%H,%ad,%s" --date=format:'%Y-%m-%d' | \
while IFS=',' read -r commit_hash commit_date message; do
# Get stats and ensure they're comma-separated
stats=$(git show --numstat --format="" "$commit_hash" | \
awk -v ext="$FILE_EXTENSION" '
$3 ~ /\.'${FILE_EXTENSION#.}'$/ {
additions+=$1;
deletions+=$2
}
END {
if (additions != "" || deletions != "") {
print additions "," deletions "," additions+deletions
}
}')
if [ ! -z "$stats" ]; then
# Escape any commas in the commit message
escaped_message=$(echo "$message" | sed 's/,/\\,/g')
echo "$commit_hash,$commit_date,$stats,\"$escaped_message\"" >> "$OUTPUT_FILE"
fi
done
echo "Per-commit statistics written to $OUTPUT_FILE"
echo "----------------------------------------"
# Move the CSV file to the original directory
mv "$OUTPUT_FILE" "$ORIGINAL_DIR/$OUTPUT_FILE"
fi
# Cleanup
cd "$ORIGINAL_DIR"
rm -rf "$TEMP_DIR"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment