Denniskamau · April 14, 2020 14:41
diff --git a/scapper.py b/scapper.py
 import urllib.request
 from bs4 import BeautifulSoup


 class Scrapper():
  def initializeScrapping(self,url):
    # Set the url of the page you want to scrap for data\
    urlpage = url
    # Using urllib open the page
    page = urllib.request.urlopen(urlpage)
    #Parse the webpage
    soup = BeautifulSoup(page, 'html.parser')
    # Get the page data from the div with a class of product list view
    producet_list = soup.find('div',class_="prod-list-view")
    # Traverse the DOM 
    items = producet_list.find('section')
    book_list = items.find('ol',class_="product-list row")
    book_data = book_list.findAll('li',class_='col-xs-6 col-md-3')
    number_of_books = len(book_data)
        
    self.startScrapping(number_of_books,book_data)
 def startScrapping(self,items,book_data):
  #Get the current working directory
   current_directory = os.getcwd()
   # Create a folder named books to store the srapped images
   path = os.path.join(current_directory,r"books")
   self.createDirectory(path)
   counter = 1
   #Loop through the product list
   for book in book_data:
     try:
      product = book.find('div',class_="product")
      url = product.find('a')
      full_url = url.get('href')
      page=urllib.request.urlopen('https://textbookcentre.com'+full_url)
      soup = BeautifulSoup(page, 'html.parser')
      data = soup.find('article',class_='product_page')
      image = data.find('div',id='product-images')
      image = image.find('a')
      image_url = image.get('href')
      #Get the title of the book so as the save each book with its title
      title_data = data.find('div',class_='col-sm-6 product_main')
      title = title_data.find('h1')
      fullpath = os.path.join(path,title.text)
      #Save the book       
      urllib.request.urlretrieve('https://textbookcentre.com'+image_url, " {}/{}.jpg".format(path,title.text))
      if counter == items:
         print('INFO: finished')
         return counter
      else:
         print('INFO: saved {} {}'.format(title.text,counter))
         counter +=1
      except Exception as e:
         print('ERROR:',e)
 
 def createDirectory(self,path):
    # Create directory to store the images
    try:
      os.mkdir(path)
    except Exception as e:
      print ("Creation of the directory failed",e )
    else:
      print ("Successfully created the directory %s " % path)
      
      
 def main():
  url = "https://textbookcentre.com/catalogue/category/text-books/primary-school/"
  scrapper  = Scrapper()
  scrapper.initializeScrapping(url)
 if __name__ == "__main__":
    main()
	import urllib.request
	from bs4 import BeautifulSoup


	class Scrapper():
	def initializeScrapping(self,url):
	# Set the url of the page you want to scrap for data\
	urlpage = url
	# Using urllib open the page
	page = urllib.request.urlopen(urlpage)
	#Parse the webpage
	soup = BeautifulSoup(page, 'html.parser')
	# Get the page data from the div with a class of product list view
	producet_list = soup.find('div',class_="prod-list-view")
	# Traverse the DOM
	items = producet_list.find('section')
	book_list = items.find('ol',class_="product-list row")
	book_data = book_list.findAll('li',class_='col-xs-6 col-md-3')
	number_of_books = len(book_data)

	self.startScrapping(number_of_books,book_data)
	def startScrapping(self,items,book_data):
	#Get the current working directory
	current_directory = os.getcwd()
	# Create a folder named books to store the srapped images
	path = os.path.join(current_directory,r"books")
	self.createDirectory(path)
	counter = 1
	#Loop through the product list
	for book in book_data:
	try:
	product = book.find('div',class_="product")
	url = product.find('a')
	full_url = url.get('href')
	page=urllib.request.urlopen('https://textbookcentre.com'+full_url)
	soup = BeautifulSoup(page, 'html.parser')
	data = soup.find('article',class_='product_page')
	image = data.find('div',id='product-images')
	image = image.find('a')
	image_url = image.get('href')
	#Get the title of the book so as the save each book with its title
	title_data = data.find('div',class_='col-sm-6 product_main')
	title = title_data.find('h1')
	fullpath = os.path.join(path,title.text)
	#Save the book
	urllib.request.urlretrieve('https://textbookcentre.com'+image_url, " {}/{}.jpg".format(path,title.text))
	if counter == items:
	print('INFO: finished')
	return counter
	else:
	print('INFO: saved {} {}'.format(title.text,counter))
	counter +=1
	except Exception as e:
	print('ERROR:',e)

	def createDirectory(self,path):
	# Create directory to store the images
	try:
	os.mkdir(path)
	except Exception as e:
	print ("Creation of the directory failed",e )
	else:
	print ("Successfully created the directory %s " % path)


	def main():
	url = "https://textbookcentre.com/catalogue/category/text-books/primary-school/"
	scrapper = Scrapper()
	scrapper.initializeScrapping(url)
	if __name__ == "__main__":
	main()