Created
November 13, 2023 01:49
-
-
Save MobCat/545c8c4eb5a46067033bb8367f3c73cd to your computer and use it in GitHub Desktop.
arcadeartwork.org box art scraper
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! arcadeartwork.org box art scraper | |
#!/env/Python3.10.4 | |
#/MobCat (2023) | |
import requests #pip install requests | |
import re | |
import os | |
# Not all consoles on site are here. Too many categories and sub categories. | |
# To make new one. Open The Album you want to download `https://www.arcadeartwork.org/index.php?/category/547` | |
# then open the first image in a new tab, scroll allll the way to the bottom and open the last image in a new tab | |
# we should now have 2 URLs like | |
# https://www.arcadeartwork.org/picture.php?/83435/category/547 | |
# https://www.arcadeartwork.org/picture.php?/83994/category/547 | |
# We want the first number after the ?/ | |
# This will say where to start scraping and where to end scraping. | |
# 3: (83435, 83994, "NES", "Nintendo Entertainment System") | |
# Select index: (start range, end range, "FolderName", "Menu Name") | |
consoles = { | |
1: (145820, 146377, "3DO", "3DO"), | |
2: (56317, 56354, "Bally", "Bally Astrocade"), | |
3: (83435, 83994, "NES", "Nintendo Entertainment System"), | |
4: (27793, 28353, "SNES", "Super Nintendo Entertainment System"), | |
5: (82229, 82527, "N64", "Nintendo 64"), | |
6: (32285, 36354, "NDS", "Nintendo DS"), | |
7: (101717, 101738, "VB", "Nintendo Virtual Boy"), | |
8: (43108, 43675, "GB", "Nintendo Game Boy"), | |
9: (80976, 81548, "GBC", "Nintendo Game Boy Color"), | |
10: (56393, 58719, "GBA", "Nintendo Game Boy Advance"), | |
11: (55740, 56316, "GBA", "Nintendo Game GameCube"), | |
} | |
# Menu builder | |
print(f""" | |
arcadeartwork.org box art scraper | |
By MobCat | |
20231113 | |
Please select a console | |
""") | |
for i in consoles: | |
print(f"{i}: {consoles[i][3]} ({(consoles[i][1] - consoles[i][0]) + 1} Boxes)") # +1 for 0th index? lol idk. | |
try: | |
select = int(input("\nSelection: ")) | |
except KeyboardInterrupt: | |
print("Terminate the script.") | |
exit() | |
# Sainaty checks | |
if select not in consoles: | |
print(f'{select} Is not a vaild type\nPlease try again') | |
exit() | |
if not os.path.exists(consoles[select][2]): | |
print(f'"{consoles[select][2]}" directory not found, makeing a new one now.') | |
os.makedirs(consoles[select][2]) | |
# Set our download range to our selected console | |
try: | |
cnt = consoles[select][0] | |
while cnt <= consoles[select][1]: | |
# Go rip image based on id in range | |
response = requests.get(f'https://www.arcadeartwork.org/action.php?id={cnt}&part=e&download') | |
# Check if the request was successful (status code 200) | |
if response.status_code == 200: | |
# Try to get the filename from the "Content-Disposition" header | |
content_disposition = response.headers.get('Content-Disposition') | |
# Extract the filename using a regular expression | |
filename_match = re.search(r'filename="(.+)"', content_disposition) | |
# Regex for replacing - to space and truncating unnecessary amount of spaces | |
filename = filename_match.group(1) | |
cleaned_filename = re.sub('-+', ' ', filename) | |
cleaned_filename = cleaned_filename.strip() | |
# Save the image with the extracted filename | |
with open(f'{consoles[select][2]}/{cleaned_filename}', 'wb') as f: | |
f.write(response.content) | |
print(f"Downloaded {consoles[select][2]}\\{cleaned_filename}") | |
else: | |
print(f"Failed to download image. Status code: {response.status_code}") | |
cnt += 1 | |
print("Done ^__^") | |
except KeyboardInterrupt: | |
print("Script terminated.") | |
exit() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment