Skip to content

Instantly share code, notes, and snippets.

@MobCat
Created November 13, 2023 01:49
Show Gist options
  • Save MobCat/545c8c4eb5a46067033bb8367f3c73cd to your computer and use it in GitHub Desktop.
Save MobCat/545c8c4eb5a46067033bb8367f3c73cd to your computer and use it in GitHub Desktop.
arcadeartwork.org box art scraper
#! arcadeartwork.org box art scraper
#!/env/Python3.10.4
#/MobCat (2023)
import requests #pip install requests
import re
import os
# Not all consoles on site are here. Too many categories and sub categories.
# To make new one. Open The Album you want to download `https://www.arcadeartwork.org/index.php?/category/547`
# then open the first image in a new tab, scroll allll the way to the bottom and open the last image in a new tab
# we should now have 2 URLs like
# https://www.arcadeartwork.org/picture.php?/83435/category/547
# https://www.arcadeartwork.org/picture.php?/83994/category/547
# We want the first number after the ?/
# This will say where to start scraping and where to end scraping.
# 3: (83435, 83994, "NES", "Nintendo Entertainment System")
# Select index: (start range, end range, "FolderName", "Menu Name")
consoles = {
1: (145820, 146377, "3DO", "3DO"),
2: (56317, 56354, "Bally", "Bally Astrocade"),
3: (83435, 83994, "NES", "Nintendo Entertainment System"),
4: (27793, 28353, "SNES", "Super Nintendo Entertainment System"),
5: (82229, 82527, "N64", "Nintendo 64"),
6: (32285, 36354, "NDS", "Nintendo DS"),
7: (101717, 101738, "VB", "Nintendo Virtual Boy"),
8: (43108, 43675, "GB", "Nintendo Game Boy"),
9: (80976, 81548, "GBC", "Nintendo Game Boy Color"),
10: (56393, 58719, "GBA", "Nintendo Game Boy Advance"),
11: (55740, 56316, "GBA", "Nintendo Game GameCube"),
}
# Menu builder
print(f"""
arcadeartwork.org box art scraper
By MobCat
20231113
Please select a console
""")
for i in consoles:
print(f"{i}: {consoles[i][3]} ({(consoles[i][1] - consoles[i][0]) + 1} Boxes)") # +1 for 0th index? lol idk.
try:
select = int(input("\nSelection: "))
except KeyboardInterrupt:
print("Terminate the script.")
exit()
# Sainaty checks
if select not in consoles:
print(f'{select} Is not a vaild type\nPlease try again')
exit()
if not os.path.exists(consoles[select][2]):
print(f'"{consoles[select][2]}" directory not found, makeing a new one now.')
os.makedirs(consoles[select][2])
# Set our download range to our selected console
try:
cnt = consoles[select][0]
while cnt <= consoles[select][1]:
# Go rip image based on id in range
response = requests.get(f'https://www.arcadeartwork.org/action.php?id={cnt}&part=e&download')
# Check if the request was successful (status code 200)
if response.status_code == 200:
# Try to get the filename from the "Content-Disposition" header
content_disposition = response.headers.get('Content-Disposition')
# Extract the filename using a regular expression
filename_match = re.search(r'filename="(.+)"', content_disposition)
# Regex for replacing - to space and truncating unnecessary amount of spaces
filename = filename_match.group(1)
cleaned_filename = re.sub('-+', ' ', filename)
cleaned_filename = cleaned_filename.strip()
# Save the image with the extracted filename
with open(f'{consoles[select][2]}/{cleaned_filename}', 'wb') as f:
f.write(response.content)
print(f"Downloaded {consoles[select][2]}\\{cleaned_filename}")
else:
print(f"Failed to download image. Status code: {response.status_code}")
cnt += 1
print("Done ^__^")
except KeyboardInterrupt:
print("Script terminated.")
exit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment