LCPallares · August 5, 2024 22:40
diff --git a/amazon-bestsellers-scraper.py b/amazon-bestsellers-scraper.py
 import requests
 from bs4 import BeautifulSoup
 import csv
 from datetime import datetime


 def scrape_amazon_bestsellers():
    
    url = "https://www.amazon.com/best-sellers-books-Amazon/zgbs/books/"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }
    
    response = requests.get(url, headers=headers)
    '''
    with open('amazon-bestsellers-scraper.html', 'r') as file:
        html = file.read()
    soup = BeautifulSoup(html, 'html.parser')
    '''
    soup = BeautifulSoup(requests.content, 'html.parser')
    
    books = []
    for item in soup.find_all("div", id="gridItemRoot"):
        rank = item.find("span", class_="zg-bdg-text").text.strip().replace("#", "")
        title = item.find_all("a", class_="a-link-normal")[1].text.strip()
        author = item.find("div", class_="a-row a-size-small").text.strip()
        score = item.find("span", class_="a-icon-alt")
        score = score.text.split(" ")[0] if score else "0.0"
        price = item.find("span", class_="_cDEzb_p13n-sc-price_3mJ9Z")
        price = price.text[1:] if price else "0.0"
        type_cover = item.find("span", class_="a-size-small a-color-secondary a-text-normal").text
        numbers_reviews = item.find("span", class_="a-size-small").text.replace(",", "")
        numbers_reviews = int(numbers_reviews) if numbers_reviews.isdigit() else 0

        books.append({
            "rank": rank,
            "title": title,
            "author": author,
            "price": float(price),
            "score": float(score),
            "type_cover": type_cover,
            "numbers_reviews": numbers_reviews,
            "date_scraped": datetime.now().strftime("%Y-%m-%d")
        })
    print(books)
    return books


 def save_to_csv(books, filename):
    keys = books[0].keys()
    with open(filename, 'w', newline='', encoding='utf-8') as output_file:
        dict_writer = csv.DictWriter(output_file, keys)
        dict_writer.writeheader()
        dict_writer.writerows(books)


 if __name__ == "__main__":
    bestsellers = scrape_amazon_bestsellers()
    save_to_csv(bestsellers, "amazon_bestsellers.csv")
    print(f"Scraped {len(bestsellers)} books and saved to amazon_bestsellers.csv")
diff --git a/analyst_amazon_bestsellers.ipynb b/analyst_amazon_bestsellers.ipynb
	import requests
	from bs4 import BeautifulSoup
	import csv
	from datetime import datetime


	def scrape_amazon_bestsellers():

	url = "https://www.amazon.com/best-sellers-books-Amazon/zgbs/books/"
	headers = {
	"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
	}

	response = requests.get(url, headers=headers)
	'''
	with open('amazon-bestsellers-scraper.html', 'r') as file:
	html = file.read()
	soup = BeautifulSoup(html, 'html.parser')
	'''
	soup = BeautifulSoup(requests.content, 'html.parser')

	books = []
	for item in soup.find_all("div", id="gridItemRoot"):
	rank = item.find("span", class_="zg-bdg-text").text.strip().replace("#", "")
	title = item.find_all("a", class_="a-link-normal")[1].text.strip()
	author = item.find("div", class_="a-row a-size-small").text.strip()
	score = item.find("span", class_="a-icon-alt")
	score = score.text.split(" ")[0] if score else "0.0"
	price = item.find("span", class_="_cDEzb_p13n-sc-price_3mJ9Z")
	price = price.text[1:] if price else "0.0"
	type_cover = item.find("span", class_="a-size-small a-color-secondary a-text-normal").text
	numbers_reviews = item.find("span", class_="a-size-small").text.replace(",", "")
	numbers_reviews = int(numbers_reviews) if numbers_reviews.isdigit() else 0

	books.append({
	"rank": rank,
	"title": title,
	"author": author,
	"price": float(price),
	"score": float(score),
	"type_cover": type_cover,
	"numbers_reviews": numbers_reviews,
	"date_scraped": datetime.now().strftime("%Y-%m-%d")
	})
	print(books)
	return books


	def save_to_csv(books, filename):
	keys = books[0].keys()
	with open(filename, 'w', newline='', encoding='utf-8') as output_file:
	dict_writer = csv.DictWriter(output_file, keys)
	dict_writer.writeheader()
	dict_writer.writerows(books)


	if __name__ == "__main__":
	bestsellers = scrape_amazon_bestsellers()
	save_to_csv(bestsellers, "amazon_bestsellers.csv")
	print(f"Scraped {len(bestsellers)} books and saved to amazon_bestsellers.csv")