gabecano4308 · January 7, 2021 03:41
diff --git a/bball ref -- get season stats (all teams) b/bball ref -- get season stats (all teams)
 def get_stats(year):
    # Creating a list of dictionaries to then convert into a Pandas Dataframe
    nba_info = []
    
    # Iteratively finding the URL page for each NBA team according to the 'year' parameter and instantiating
    # a BeautifulSoup object
    for i in team_list:
        team_url = (f'https://www.basketball-reference.com{i}/{str(year)}.html')
        team_res = requests.get(team_url)
        team_soup = BeautifulSoup(team_res.content, 'lxml')
        per_game = team_soup.find(name = 'table', attrs = {'id' : 'per_game'})

        for row in per_game.find_all('tr')[1:]:  # Excluding the first 'tr', since that's the table's title head

            player = {}
            player['Name'] = row.find('a').text.strip()
            
            team = i[-3:]
            player['Team'] = team
            
            player['Age'] = row.find('td', {'data-stat' : 'age'}).text
            player['Min PG'] = row.find('td', {'data-stat' : 'mp_per_g'}).text
            player['Field Goal %'] = row.find('td', {'data-stat' : 'fg_pct'}).text
            player['Rebounds PG'] = row.find('td', {'data-stat' : 'trb_per_g'}).text
            player['Assists PG'] = row.find('td', {'data-stat' : 'ast_per_g'}).text
            player['Steals PG'] = row.find('td', {'data-stat' : 'stl_per_g'}).text
            player['Blocks PG'] = row.find('td', {'data-stat' : 'blk_per_g'}).text
            player['Turnovers PG'] = row.find('td', {'data-stat' : 'tov_per_g'}).text
            player['Points PG'] = row.find('td', {'data-stat' : 'pts_per_g'}).text

            player_url = ('https://www.basketball-reference.com/' + row.find('a').attrs['href'])
            player_rest = requests.get(player_url)
            player_soup = BeautifulSoup(player_rest.content, 'lxml')
            player_info = player_soup.find(name = 'div', attrs = {'itemtype' : 'https://schema.org/Person'})

            player_links= []
            for link in player_info.find_all('a'):
                player_links.append(link.get('href'))

            if 'twitter' in player_links[1]:
                player['Twitter Handle'] = player_links[1].replace('https://twitter.com/', '')
            else:
                player['Twitter Handle'] = 'Not Listed'

            s = str(player_info.find_all('p'))

            weight = re.search('\"weight\">(.*)lb</span>', s)
            position = re.search('Position:\n  </strong>\n (.*)\n\n', s)
            height = re.search('\"height\">(.*)</span>,\xa0<span itemprop="weight', s)
            player['Height'] = height.group(1).strip()
            player['Weight (Lbs)'] = weight.group(1).strip()
            player['Position'] = position.group(1).strip()

            nba_info.append(player)
        
    nba_info_df = pd.DataFrame(nba_info)
    return nba_info_df
	def get_stats(year):
	# Creating a list of dictionaries to then convert into a Pandas Dataframe
	nba_info = []

	# Iteratively finding the URL page for each NBA team according to the 'year' parameter and instantiating
	# a BeautifulSoup object
	for i in team_list:
	team_url = (f'https://www.basketball-reference.com{i}/{str(year)}.html')
	team_res = requests.get(team_url)
	team_soup = BeautifulSoup(team_res.content, 'lxml')
	per_game = team_soup.find(name = 'table', attrs = {'id' : 'per_game'})

	for row in per_game.find_all('tr')[1:]: # Excluding the first 'tr', since that's the table's title head

	player = {}
	player['Name'] = row.find('a').text.strip()

	team = i[-3:]
	player['Team'] = team

	player['Age'] = row.find('td', {'data-stat' : 'age'}).text
	player['Min PG'] = row.find('td', {'data-stat' : 'mp_per_g'}).text
	player['Field Goal %'] = row.find('td', {'data-stat' : 'fg_pct'}).text
	player['Rebounds PG'] = row.find('td', {'data-stat' : 'trb_per_g'}).text
	player['Assists PG'] = row.find('td', {'data-stat' : 'ast_per_g'}).text
	player['Steals PG'] = row.find('td', {'data-stat' : 'stl_per_g'}).text
	player['Blocks PG'] = row.find('td', {'data-stat' : 'blk_per_g'}).text
	player['Turnovers PG'] = row.find('td', {'data-stat' : 'tov_per_g'}).text
	player['Points PG'] = row.find('td', {'data-stat' : 'pts_per_g'}).text

	player_url = ('https://www.basketball-reference.com/' + row.find('a').attrs['href'])
	player_rest = requests.get(player_url)
	player_soup = BeautifulSoup(player_rest.content, 'lxml')
	player_info = player_soup.find(name = 'div', attrs = {'itemtype' : 'https://schema.org/Person'})

	player_links= []
	for link in player_info.find_all('a'):
	player_links.append(link.get('href'))

	if 'twitter' in player_links[1]:
	player['Twitter Handle'] = player_links[1].replace('https://twitter.com/', '')
	else:
	player['Twitter Handle'] = 'Not Listed'

	s = str(player_info.find_all('p'))

	weight = re.search('\"weight\">(.*)lb</span>', s)
	position = re.search('Position:\n </strong>\n (.*)\n\n', s)
	height = re.search('\"height\">(.*)</span>,\xa0<span itemprop="weight', s)
	player['Height'] = height.group(1).strip()
	player['Weight (Lbs)'] = weight.group(1).strip()
	player['Position'] = position.group(1).strip()

	nba_info.append(player)

	nba_info_df = pd.DataFrame(nba_info)
	return nba_info_df