Created
May 23, 2023 19:44
-
-
Save eliac7/d18a80bd308d0caf36f5f47980ab0765 to your computer and use it in GitHub Desktop.
Python script for scraping product data from a website using Selenium WebDriver. It automates the search process for a list of product codes on https://www.plaisio.gr/ and saves the results in a CSV file. The script utilizes Chrome WebDriver and includes error handling for cases where no search results are found. The code is organized into funct…
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
from selenium import webdriver | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.common.keys import Keys | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as EC | |
from webdriver_manager.chrome import ChromeDriverManager | |
CODES = [ | |
# List of product codes to search for | |
3991016, 3990583, 4176618, 4176596, 3853780, 4079914, 4080688, 4080718, 4172930, 4006402, | |
3860795, 4006399, 3989178, 4108108, 4111656, 3578224, 4141210, 3903907, 4234413, 3846229, | |
3820912, 3177998, 4033213, 3530795, 4048350, 3530728, 3522385, 3843378, 3731448, 4018567, | |
3808769, 4148525, 3889262, 4111648, 3935450, 3919161, 3787591, 3023966, 2488167, 2049546, | |
2417405, 1352407, 3899330, 3899373, 3899438, 4186060, 4180747, 4180739, 4180720, 4101162, | |
4101170, 4100700, 3794644, 3862135, 4238745, 4238737, 3950778, 3950743, 3901793, 3885690 | |
] | |
def scrape_product_data(driver, codes): | |
# Open a CSV file to store the scraped data | |
with open("product_results.csv", "w", newline="", encoding="utf-8") as csv_file: | |
writer = csv.writer(csv_file) | |
writer.writerow(["code", "product_title", "product_link", "product_price"]) # Header | |
# Open the website in Chrome | |
driver.get("https://www.plaisio.gr/") | |
# Find the search input field | |
search_input = driver.find_element(By.CLASS_NAME, "search-input") | |
print(f"Searching for {len(codes)} products...") | |
for code in codes: | |
# Clear the search input and enter the product code | |
search_input.clear() | |
search_input.send_keys(str(code)) | |
search_input.send_keys(Keys.RETURN) | |
# Wait until the search results container is visible | |
wait = WebDriverWait(driver, 10) | |
search_results = wait.until(EC.visibility_of_element_located((By.CLASS_NAME, "search-results__articles.border-right"))) | |
try: | |
# Find the first search result | |
first_li = search_results.find_element(By.TAG_NAME, "li") | |
# Extract the product title and link | |
product_title = first_li.find_element(By.CLASS_NAME, "description").find_element(By.CLASS_NAME, "ais-Highlight").text | |
product_link = first_li.find_element(By.TAG_NAME, "a").get_attribute("href") | |
# Extract the product price | |
product_price_element = first_li.find_element(By.CLASS_NAME, "prices") | |
product_price = product_price_element.find_element(By.CLASS_NAME, "current-price").text | |
except: | |
# If no search results found, leave the fields empty | |
product_title = "" | |
product_link = "" | |
product_price = "" | |
# Write the results to the CSV file | |
writer.writerow([code, product_title, product_link, product_price]) | |
def main(): | |
# Set up Chrome WebDriver | |
driver = webdriver.Chrome(ChromeDriverManager().install()) | |
driver.maximize_window() | |
# Scrape product data | |
scrape_product_data(driver, CODES) | |
# Close the browser | |
driver.quit() | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment