Skip to content

Instantly share code, notes, and snippets.

@nijave
Last active February 14, 2025 01:15
Show Gist options
  • Save nijave/ca87709516d622910ce66940a4e4d393 to your computer and use it in GitHub Desktop.
Save nijave/ca87709516d622910ce66940a4e4d393 to your computer and use it in GitHub Desktop.
serverpartdeals.com $/TiB
import re
import niquests
import sys
from bs4 import BeautifulSoup
def parse_page(soup):
drives = soup.find_all(class_="boost-pfs-filter-product-bottom")
for drive in drives:
product_title = drive.find(class_="boost-pfs-filter-product-item-title").text
try:
drive_size = float(re.search(r"[0-9.]+(?=TB)", product_title).group(0))
except AttributeError:
print("xxx xxx", product_title, file=sys.stderr)
continue
try:
drive_price = float(
drive.find(
class_="boost-pfs-filter-product-item-regular-price"
).text.replace("$", "")
)
except AttributeError:
drive_price = float(
drive.find(
class_="boost-pfs-filter-product-item-sale-price"
).text.replace("$", "")
)
print(round(drive_price/drive_size, 2), drive_size, drive_price, product_title)
def crawl():
page = 1
url = "https://serverpartdeals.com/collections/hard-drives"
while True:
print("Getting page", page, file=sys.stderr)
content = niquests.get(
url,
params={
"display": "list",
"page": page,
},
headers={
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:135.0) Gecko/20100101 Firefox/135.0",
"Accept-Language": "en-US,en;q=0.5",
},
).content
soup = BeautifulSoup(content, features="html.parser")
parse_page(soup)
if soup.find("a", string="→"):
page += 1
else:
break
if __name__ == "__main__":
crawl()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment