gabecano4308 · December 31, 2020 19:09
diff --git a/bball ref -- combined df b/bball ref -- combined df
 # URL for the Washington Wizards Basketball Reference page
 wiz_url = (f'https://www.basketball-reference.com/teams/WAS/2021.html')

 # The requests library can send a GET request to the wiz_url
 wiz_res = requests.get(wiz_url)

 # BeautifulSoup library parses the content of an HTML document, in this case wiz_res
 wiz_soup = BeautifulSoup(wiz_res.content, 'lxml')

 # BeautifulSoup's .find() method searches for a tag and specified attributes, 
 # returning the first match 
 wiz_per_game = wiz_soup.find(name = 'table', attrs = {'id' : 'per_game'})

 # Making a list of dictionaries to then convert into a pd.DataFrame
 wiz_info = []
 for row in wiz_per_game.find_all('tr')[1:]:  # Excluding the first 'tr', since that's the table's title head

    player = {}
    player['Name'] = row.find('a').text.strip()
    player['Age'] = row.find('td', {'data-stat' : 'age'}).text
    player['Min PG'] = row.find('td', {'data-stat' : 'mp_per_g'}).text
    player['Field Goal %'] = row.find('td', {'data-stat' : 'fg_pct'}).text
    player['Rebounds PG'] = row.find('td', {'data-stat' : 'trb_per_g'}).text
    player['Assists PG'] = row.find('td', {'data-stat' : 'ast_per_g'}).text
    player['Steals PG'] = row.find('td', {'data-stat' : 'stl_per_g'}).text
    player['Blocks PG'] = row.find('td', {'data-stat' : 'blk_per_g'}).text
    player['Turnovers PG'] = row.find('td', {'data-stat' : 'tov_per_g'}).text
    player['Points PG'] = row.find('td', {'data-stat' : 'pts_per_g'}).text

    player_url = ('https://www.basketball-reference.com/' + row.find('a').attrs['href'])
    player_rest = requests.get(player_url)
    player_soup = BeautifulSoup(player_rest.content, 'lxml')
    player_info = player_soup.find(name = 'div', attrs = {'itemtype' : 'https://schema.org/Person'})

    player_links= []
    for link in player_info.find_all('a'):
        player_links.append(link.get('href'))

    if 'twitter' in player_links[1]:
        player['Twitter Handle'] = player_links[1].replace('https://twitter.com/', '')
    else:
        player['Twitter Handle'] = 'Not Listed'

    s = str(player_info.find_all('p'))

    weight = re.search('\"weight\">(.*)lb</span>', s)
    position = re.search('Position:\n  </strong>\n (.*)\n\n', s)
    height = re.search('\"height\">(.*)</span>,\xa0<span itemprop="weight', s)
    player['Height'] = height.group(1).strip()
    player['Weight (Lbs)'] = weight.group(1).strip()
    player['Position'] = position.group(1).strip()

    wiz_info.append(player)
        
 pd.DataFrame(wiz_info)
	# URL for the Washington Wizards Basketball Reference page
	wiz_url = (f'https://www.basketball-reference.com/teams/WAS/2021.html')

	# The requests library can send a GET request to the wiz_url
	wiz_res = requests.get(wiz_url)

	# BeautifulSoup library parses the content of an HTML document, in this case wiz_res
	wiz_soup = BeautifulSoup(wiz_res.content, 'lxml')

	# BeautifulSoup's .find() method searches for a tag and specified attributes,
	# returning the first match
	wiz_per_game = wiz_soup.find(name = 'table', attrs = {'id' : 'per_game'})

	# Making a list of dictionaries to then convert into a pd.DataFrame
	wiz_info = []
	for row in wiz_per_game.find_all('tr')[1:]: # Excluding the first 'tr', since that's the table's title head

	player = {}
	player['Name'] = row.find('a').text.strip()
	player['Age'] = row.find('td', {'data-stat' : 'age'}).text
	player['Min PG'] = row.find('td', {'data-stat' : 'mp_per_g'}).text
	player['Field Goal %'] = row.find('td', {'data-stat' : 'fg_pct'}).text
	player['Rebounds PG'] = row.find('td', {'data-stat' : 'trb_per_g'}).text
	player['Assists PG'] = row.find('td', {'data-stat' : 'ast_per_g'}).text
	player['Steals PG'] = row.find('td', {'data-stat' : 'stl_per_g'}).text
	player['Blocks PG'] = row.find('td', {'data-stat' : 'blk_per_g'}).text
	player['Turnovers PG'] = row.find('td', {'data-stat' : 'tov_per_g'}).text
	player['Points PG'] = row.find('td', {'data-stat' : 'pts_per_g'}).text

	player_url = ('https://www.basketball-reference.com/' + row.find('a').attrs['href'])
	player_rest = requests.get(player_url)
	player_soup = BeautifulSoup(player_rest.content, 'lxml')
	player_info = player_soup.find(name = 'div', attrs = {'itemtype' : 'https://schema.org/Person'})

	player_links= []
	for link in player_info.find_all('a'):
	player_links.append(link.get('href'))

	if 'twitter' in player_links[1]:
	player['Twitter Handle'] = player_links[1].replace('https://twitter.com/', '')
	else:
	player['Twitter Handle'] = 'Not Listed'

	s = str(player_info.find_all('p'))

	weight = re.search('\"weight\">(.*)lb</span>', s)
	position = re.search('Position:\n </strong>\n (.*)\n\n', s)
	height = re.search('\"height\">(.*)</span>,\xa0<span itemprop="weight', s)
	player['Height'] = height.group(1).strip()
	player['Weight (Lbs)'] = weight.group(1).strip()
	player['Position'] = position.group(1).strip()

	wiz_info.append(player)

	pd.DataFrame(wiz_info)