melodykramer · December 22, 2017 20:57
diff --git a/scraping 2018 nieman predictions b/scraping 2018 nieman predictions
 ##libraries

 import urllib2
 from bs4 import BeautifulSoup
 import csv


 import sys
 reload(sys)
 sys.setdefaultencoding('utf8')

 ## getting the url we want to scrape

 nieman_page = 'http://www.niemanlab.org/2017/12/the-rise-of-skeptical-reading/'

 ## querying the page and returning the html to the variable page

 page = urllib2.urlopen(nieman_page).read()

 ## parsing html using BeautifulSoup and storing html in variable soup

 soup = BeautifulSoup(page, 'html.parser')

 ## writes to csv 

 file = csv.writer(open("Predictions.csv", "w"))
 file.writerow(["Headline", "Blurb", "Byline", "Text"])


 ## prints headline

 for headline in soup.findAll(attrs={'class' : 'simple-headline'}):
 	headline = headline.text

 ## prints blurb    
    
 for blurb in soup.findAll(attrs={'class' : 'simple-post-deck'}):
 	blurb = blurb.text
    
 ## prints byline    
    
 for author in soup.findAll(attrs={'class' : 'predix-byline'}):
 	author = author.text
    
 ## prints text + author bio    
    
 for prediction in soup.findAll(attrs={'class' : 'predix-storybody'}):
 	prediction = prediction.text
 	
 	file.writerow([headline, blurb, author, prediction])
	##libraries

	import urllib2
	from bs4 import BeautifulSoup
	import csv


	import sys
	reload(sys)
	sys.setdefaultencoding('utf8')

	## getting the url we want to scrape

	nieman_page = 'http://www.niemanlab.org/2017/12/the-rise-of-skeptical-reading/'

	## querying the page and returning the html to the variable page

	page = urllib2.urlopen(nieman_page).read()

	## parsing html using BeautifulSoup and storing html in variable soup

	soup = BeautifulSoup(page, 'html.parser')

	## writes to csv

	file = csv.writer(open("Predictions.csv", "w"))
	file.writerow(["Headline", "Blurb", "Byline", "Text"])


	## prints headline

	for headline in soup.findAll(attrs={'class' : 'simple-headline'}):
	headline = headline.text

	## prints blurb

	for blurb in soup.findAll(attrs={'class' : 'simple-post-deck'}):
	blurb = blurb.text

	## prints byline

	for author in soup.findAll(attrs={'class' : 'predix-byline'}):
	author = author.text

	## prints text + author bio

	for prediction in soup.findAll(attrs={'class' : 'predix-storybody'}):
	prediction = prediction.text

	file.writerow([headline, blurb, author, prediction])