Created
May 19, 2016 14:51
-
-
Save krishashok/8b4013d37cba26f152216f1680b48b8d to your computer and use it in GitHub Desktop.
Extract election results for 2016 state elections from the ECI website into a spreadsheet.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import requests | |
from xlwt.Workbook import * | |
from xlwt import easyxf,Formula | |
#initiatize a bloody excel sheet | |
wb = Workbook() | |
#initiatize a sheet on that bloody excel | |
ws = wb.add_sheet('0') | |
#Set column widths and fonts and shit | |
ws.col(0).width = 256 * 30 | |
ws.col(1).width = 256 * 30 | |
ws.col(2).width = 256 * 60 | |
ws.col(3).width = 256 * 30 | |
ws.col(4).width = 256 * 30 | |
style_link = easyxf('font: underline single, name Arial, height 280, colour_index blue') | |
style_heading = easyxf('font: bold 1, name Arial, height 280; pattern: pattern solid, pattern_fore_colour yellow, pattern_back_colour yellow') | |
style_wrap = easyxf('align: wrap 1; font: height 280') | |
# Headings in proper MBA spreadsheet style - Bold with yellow background | |
ws.write(0,0,'Constituency',style_heading) | |
ws.write(0,1,'Const. No.',style_heading) | |
ws.write(0,2,'Leading Candidate',style_heading) | |
ws.write(0,3,'Leading Party',style_heading) | |
ws.write(0,4,'Trailing Candidate',style_heading) | |
ws.write(0,5,'Trailing Party',style_heading) | |
ws.write(0,6,'Margin',style_heading) | |
ws.write(0,7,'Status',style_heading) | |
# The damn ECI site uses a URL pattern that appends a page number to http://eciresults.nic.in/StatewiseS22 before the .htm FOR TN RESULTS | |
# If you want another state, set url below to the first page URL for that state | |
url_pattern = 'http://eciresults.nic.in/StatewiseS22' | |
# The damn site has 24 pages of results for TN, so we loop through that nonsense | |
# If it's another state, change the 24 in range(24) to the number of pages the damn site for that goddamned state | |
row_count = 1 | |
for i in range(24): | |
# Get shit from ECI site for the current page | |
if i == 0: | |
url = url_pattern + ".htm" | |
else: | |
url = url_pattern + str(i) + ".htm" | |
r = requests.get(url) | |
data = r.text | |
# Turn shit into Beautiful Soup | |
soup = BeautifulSoup(data, "lxml") | |
# Find the table on the damn page | |
table = soup.find('table') | |
# Loop through table and save shit in the bloody spreadsheet | |
for row in table.find_all('tr'): | |
if len(row) == 8: | |
col_count = 0 | |
for cell in row.find_all('td'): | |
ws.write(row_count, col_count, cell.text,style_wrap) | |
col_count = col_count + 1 | |
row_count = row_count + 1 | |
wb.save('election_results.xls') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment