terryjbates · January 19, 2025 11:14
diff --git a/parse_html_table.py b/parse_html_table.py
 def parse_html_table(file_path):
    #with open(file_path, 'r') as f:
    with codecs.open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
        soup = BeautifulSoup(f, 'html.parser')

    table = soup.find('table')  # Adjust the selector if needed

    # Find headers and data rows directly from the table
    headers = [th.text for th in table.find_all('th')]
    data = []
    for row in table.find_all('tr')[1:]:  # Skip the first row if it's a header
        row_data = [td.text for td in row.find_all('td')]
        data.append(row_data)

    # Create DataFrame
    df = pd.DataFrame(data, columns=headers)
    return df
	def parse_html_table(file_path):
	#with open(file_path, 'r') as f:
	with codecs.open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
	soup = BeautifulSoup(f, 'html.parser')

	table = soup.find('table') # Adjust the selector if needed

	# Find headers and data rows directly from the table
	headers = [th.text for th in table.find_all('th')]
	data = []
	for row in table.find_all('tr')[1:]: # Skip the first row if it's a header
	row_data = [td.text for td in row.find_all('td')]
	data.append(row_data)

	# Create DataFrame
	df = pd.DataFrame(data, columns=headers)
	return df