Last active
December 14, 2015 03:39
-
-
Save Pathoschild/5022669 to your computer and use it in GitHub Desktop.
A bash script which connects to the toolserver.org database and generates a report on the latest activity dates for every bureaucrat across every Wikimedia wiki.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# Retrieves the dates of the latest edit and log action for every bureaucrat | |
# across every Wikimedia wiki. This generates a TSV dataset that can be imported | |
# into Google Fusion Tables or processed into another format. | |
# SQL which generates TSV data (with fields for Google Fusion) | |
wikiSql="SELECT dbname, server, lang, family, domain, size, is_closed FROM toolserver.wiki ORDER BY dbname" | |
scanSql="SELECT /* SLOW_OK */ *, GREATEST(last_edit, last_log) AS last_edit_or_log, COALESCE(DATE_FORMAT(last_edit, '%d/%m/%y %H:%i'), 'never') AS last_edit_us, COALESCE(DATE_FORMAT(last_log, '%d/%m/%y %H:%i'), 'never') AS last_log_us, COALESCE(DATE_FORMAT(GREATEST(last_edit, last_log), '%d/%m/%y %H:%i'), 'never') AS last_edit_or_log_us FROM (SELECT user_name, (SELECT COALESCE(MAX(rev_timestamp), 'never') FROM revision WHERE rev_user = user_id) AS last_edit, (SELECT COALESCE(MAX(log_timestamp), 'never') FROM logging_ts_alternative WHERE log_user = user_id AND log_type IN ('abusefilter', 'block', 'delete', 'protect', 'renameuser', 'rights')) AS last_log FROM user INNER JOIN user_groups ON user_id = ug_user AND ug_group = 'bureaucrat') AS raw" | |
target="/home/pathoschild/public_html/bureaucrat_activity.tsv" | |
# scan & generate TSV | |
echo "wiki language family domain size closed user last_edit last_log last_edit_or_log last_edit_us last_log_us last_edit_or_log_us" > ${target} | |
mysql -e "${wikiSql}" --host=sql-s1-rr --skip-column-names --comments | while read dbName server lang family domain size closed; do | |
echo "scanning ${dbName}..." | |
mysql -e "${scanSql}" --host=sql-s${server}-rr --skip-column-names --comments --database=${dbName} --delimiter=" " | while IFS=" " read userName lastEdit lastLog lastEditOrLog lastEditUS lastLogUS lastEditOrLogUS; do | |
entry="${dbName} ${lang} ${family} ${domain} ${size} ${closed} ${userName} ${lastEdit} ${lastLog} ${lastEditOrLog} ${lastEditUS} ${lastLogUS} ${lastEditOrLogUS}" | |
echo ${entry} | |
echo "${entry}" >> ${target} | |
done | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment