Created
January 7, 2021 03:41
-
-
Save gabecano4308/91727d7c419a463292f3322da5a6c286 to your computer and use it in GitHub Desktop.
for part 2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_stats(year): | |
# Creating a list of dictionaries to then convert into a Pandas Dataframe | |
nba_info = [] | |
# Iteratively finding the URL page for each NBA team according to the 'year' parameter and instantiating | |
# a BeautifulSoup object | |
for i in team_list: | |
team_url = (f'https://www.basketball-reference.com{i}/{str(year)}.html') | |
team_res = requests.get(team_url) | |
team_soup = BeautifulSoup(team_res.content, 'lxml') | |
per_game = team_soup.find(name = 'table', attrs = {'id' : 'per_game'}) | |
for row in per_game.find_all('tr')[1:]: # Excluding the first 'tr', since that's the table's title head | |
player = {} | |
player['Name'] = row.find('a').text.strip() | |
team = i[-3:] | |
player['Team'] = team | |
player['Age'] = row.find('td', {'data-stat' : 'age'}).text | |
player['Min PG'] = row.find('td', {'data-stat' : 'mp_per_g'}).text | |
player['Field Goal %'] = row.find('td', {'data-stat' : 'fg_pct'}).text | |
player['Rebounds PG'] = row.find('td', {'data-stat' : 'trb_per_g'}).text | |
player['Assists PG'] = row.find('td', {'data-stat' : 'ast_per_g'}).text | |
player['Steals PG'] = row.find('td', {'data-stat' : 'stl_per_g'}).text | |
player['Blocks PG'] = row.find('td', {'data-stat' : 'blk_per_g'}).text | |
player['Turnovers PG'] = row.find('td', {'data-stat' : 'tov_per_g'}).text | |
player['Points PG'] = row.find('td', {'data-stat' : 'pts_per_g'}).text | |
player_url = ('https://www.basketball-reference.com/' + row.find('a').attrs['href']) | |
player_rest = requests.get(player_url) | |
player_soup = BeautifulSoup(player_rest.content, 'lxml') | |
player_info = player_soup.find(name = 'div', attrs = {'itemtype' : 'https://schema.org/Person'}) | |
player_links= [] | |
for link in player_info.find_all('a'): | |
player_links.append(link.get('href')) | |
if 'twitter' in player_links[1]: | |
player['Twitter Handle'] = player_links[1].replace('https://twitter.com/', '') | |
else: | |
player['Twitter Handle'] = 'Not Listed' | |
s = str(player_info.find_all('p')) | |
weight = re.search('\"weight\">(.*)lb</span>', s) | |
position = re.search('Position:\n </strong>\n (.*)\n\n', s) | |
height = re.search('\"height\">(.*)</span>,\xa0<span itemprop="weight', s) | |
player['Height'] = height.group(1).strip() | |
player['Weight (Lbs)'] = weight.group(1).strip() | |
player['Position'] = position.group(1).strip() | |
nba_info.append(player) | |
nba_info_df = pd.DataFrame(nba_info) | |
return nba_info_df |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment