Created
January 10, 2017 04:55
-
-
Save normalhuman/6bfdce417cf8537a34ba5188286e35b4 to your computer and use it in GitHub Desktop.
Scraping a user's helpful flag counts network-wide. Python 3
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import urllib.request | |
from bs4 import BeautifulSoup | |
from operator import itemgetter | |
from time import sleep | |
account_id = 'YOUR ACCOUNT ID HERE' | |
network_profile = 'http://stackexchange.com/users/' + account_id + '?tab=accounts' | |
with urllib.request.urlopen(network_profile) as response: | |
html = response.read() | |
soup = BeautifulSoup(html, 'html.parser') | |
sites = soup.find_all('div', class_='account-site') | |
site_profiles = [] | |
for site in sites: | |
link = site.contents[1].contents[1] | |
print(link) | |
name = re.sub('&', '&', link.string.strip()) if link.string else 'unknown' | |
site_profiles.append({'site_name': name, 'url': link['href'] + '?tab=topactivity'}) | |
total_flags = 0 | |
for profile in site_profiles: | |
sleep(1) | |
with urllib.request.urlopen(profile['url']) as response: | |
html = response.read() | |
soup = BeautifulSoup(html, 'html.parser') | |
icon = soup.find("span", class_="icon-helpful-flags") | |
if icon: | |
flags = icon.next_sibling | |
profile['flag_count'] = int(re.sub(",", "", flags.split()[0])) | |
else: | |
profile['flag_count'] = 0 | |
total_flags += profile['flag_count'] | |
print('*** Flag Counts ***\n') | |
cutoffs = [500, 400, 300, 200, 100] | |
groups = {'500': [], '400': [], '300': [], '200': [], '100': []} | |
site_profiles.sort(key=itemgetter('flag_count'), reverse=True) | |
for profile in site_profiles: | |
print(u'{} flags on {}'.format(profile['flag_count'], profile['site_name'])) | |
for cut in cutoffs: | |
if profile['flag_count'] >= cut: | |
groups[str(cut)].append(profile['site_name']) | |
break | |
print('\n *** Summary ***') | |
prev_cut = 500 | |
for cut in cutoffs: | |
if len(groups[str(cut)]) > 0: | |
if cut == 500: | |
print('\n### Marshal badges ({})\n'.format(len(groups[str(cut)]))) | |
else: | |
print('\n### {}-{} helpful flags ({})\n'.format(cut, prev_cut-1, len(groups[str(cut)]))) | |
prev_cut = cut | |
print(", ".join(groups[str(cut)])) | |
print('\n *** Total ***') | |
print(u'{} helpful flags network-wide'.format(total_flags)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment