Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save gabecano4308/91727d7c419a463292f3322da5a6c286 to your computer and use it in GitHub Desktop.
Save gabecano4308/91727d7c419a463292f3322da5a6c286 to your computer and use it in GitHub Desktop.
for part 2
def get_stats(year):
# Creating a list of dictionaries to then convert into a Pandas Dataframe
nba_info = []
# Iteratively finding the URL page for each NBA team according to the 'year' parameter and instantiating
# a BeautifulSoup object
for i in team_list:
team_url = (f'https://www.basketball-reference.com{i}/{str(year)}.html')
team_res = requests.get(team_url)
team_soup = BeautifulSoup(team_res.content, 'lxml')
per_game = team_soup.find(name = 'table', attrs = {'id' : 'per_game'})
for row in per_game.find_all('tr')[1:]: # Excluding the first 'tr', since that's the table's title head
player = {}
player['Name'] = row.find('a').text.strip()
team = i[-3:]
player['Team'] = team
player['Age'] = row.find('td', {'data-stat' : 'age'}).text
player['Min PG'] = row.find('td', {'data-stat' : 'mp_per_g'}).text
player['Field Goal %'] = row.find('td', {'data-stat' : 'fg_pct'}).text
player['Rebounds PG'] = row.find('td', {'data-stat' : 'trb_per_g'}).text
player['Assists PG'] = row.find('td', {'data-stat' : 'ast_per_g'}).text
player['Steals PG'] = row.find('td', {'data-stat' : 'stl_per_g'}).text
player['Blocks PG'] = row.find('td', {'data-stat' : 'blk_per_g'}).text
player['Turnovers PG'] = row.find('td', {'data-stat' : 'tov_per_g'}).text
player['Points PG'] = row.find('td', {'data-stat' : 'pts_per_g'}).text
player_url = ('https://www.basketball-reference.com/' + row.find('a').attrs['href'])
player_rest = requests.get(player_url)
player_soup = BeautifulSoup(player_rest.content, 'lxml')
player_info = player_soup.find(name = 'div', attrs = {'itemtype' : 'https://schema.org/Person'})
player_links= []
for link in player_info.find_all('a'):
player_links.append(link.get('href'))
if 'twitter' in player_links[1]:
player['Twitter Handle'] = player_links[1].replace('https://twitter.com/', '')
else:
player['Twitter Handle'] = 'Not Listed'
s = str(player_info.find_all('p'))
weight = re.search('\"weight\">(.*)lb</span>', s)
position = re.search('Position:\n </strong>\n (.*)\n\n', s)
height = re.search('\"height\">(.*)</span>,\xa0<span itemprop="weight', s)
player['Height'] = height.group(1).strip()
player['Weight (Lbs)'] = weight.group(1).strip()
player['Position'] = position.group(1).strip()
nba_info.append(player)
nba_info_df = pd.DataFrame(nba_info)
return nba_info_df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment