Last active
April 14, 2020 14:20
-
-
Save Denniskamau/a45e7570f6ac64bbd74739cb61766208 to your computer and use it in GitHub Desktop.
initialize scrapper class
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Scrapper(): | |
def initializeScrapping(self,url): | |
# Set the url of the page you want to scrap for data\ | |
urlpage = url | |
# Using urllib open the page | |
page = urllib.request.urlopen(urlpage) | |
#Parse the webpage | |
soup = BeautifulSoup(page, 'html.parser') | |
# Get the page data from the div with a class of product list view | |
producet_list = soup.find('div',class_="prod-list-view") | |
# Traverse the DOM | |
items = producet_list.find('section') | |
book_list = items.find('ol',class_="product-list row") | |
book_data = book_list.findAll('li',class_='col-xs-6 col-md-3') | |
number_of_books = len(book_data) | |
# call the scrapping method | |
self.startScrapping(number_of_books,book_data) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment