Forked from ramhiser/download-espn-mlb-standings.py
Last active
December 21, 2015 16:09
-
-
Save amcclosky/6331174 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# The following script scrapes ESPN's MLB Standings Grid and writes the | |
# standings for each American League (AL) team to a CSV file, which has the following | |
# format: | |
# Team, Opponent, Wins, Losses | |
from bs4 import BeautifulSoup | |
import urllib2 | |
import re | |
import csv | |
csv_filename = 'AL-standings.csv' | |
year = '2013' | |
url = 'http://espn.go.com/mlb/standings/grid/_/year/' + year | |
page = urllib2.urlopen(url) | |
soup = BeautifulSoup(page.read()) | |
# Extracts the table for the American League (AL) and the rows for each team | |
AL_table = soup.find(text = re.compile("American")).find_parent("table") | |
AL_rows = AL_table.findAll('tr', class_ = re.compile("team")) | |
# Creates a list of the AL teams and then appends NL for National League | |
AL_teams = [team_row.find('b').text for team_row in AL_rows] | |
AL_teams.append("NL") | |
# Opens a CSV file for the AL standings | |
with open(csv_filename, 'wb') as f: | |
csv_out = csv.writer(f) | |
csv_out.writerow(['Team', 'Opponent', 'Wins', 'Losses']) | |
# For each team in the AL table, identifies the team's name, the opponent, | |
# and their wins and losses (WL) against that opponent. Then outputs the | |
# results to the open CSV file | |
for team_row in AL_rows: | |
team = team_row.find('b').text | |
# A cell has the following form: | |
# <td align="right"> | |
# 7-9</td> | |
WL_cells = team_row.findAll('td', align = "right") | |
# Extracts the values for both wins and losses from each WL table cell | |
wins_losses = [td_cell.text.strip('\n').split('-') for td_cell in WL_cells] | |
for i, al_team in enumerate(AL_teams): | |
if team != al_team: | |
csv_out.writerow([team, AL_teams[i], wins_losses[i][0], wins_losses[i][1]]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment