Skip to content

Instantly share code, notes, and snippets.

@KevinAlavik
Last active January 9, 2025 11:53
Show Gist options
  • Save KevinAlavik/384cd22af0a146c1130c681276f795b9 to your computer and use it in GitHub Desktop.
Save KevinAlavik/384cd22af0a146c1130c681276f795b9 to your computer and use it in GitHub Desktop.
Myrient Scrapper, simple web-scrapper to download games (ROMS) from Myrient (https://myrient.erista.me), you can tweak the variables to change region and game type.
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, unquote
import logging
import colorlog
from concurrent.futures import ThreadPoolExecutor
from time import time
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
# Logging setup
handler = colorlog.StreamHandler()
formatter = colorlog.ColoredFormatter(
"%(log_color)s%(asctime)s - %(levelname)s - %(message)s",
log_colors={
'DEBUG': 'white',
'INFO': 'cyan',
'WARNING': 'yellow',
'ERROR': 'red',
'CRITICAL': 'bold_red',
}
)
handler.setFormatter(formatter)
logger = colorlog.getLogger(__name__)
logger.addHandler(handler)
logger.setLevel(logging.INFO)
# Session setup
session = requests.Session()
retries = Retry(
total=5,
backoff_factor=1,
status_forcelist=[500, 502, 503, 504]
)
adapter = HTTPAdapter(max_retries=retries)
session.mount("https://", adapter)
session.mount("http://", adapter)
# Configuration
url = "https://myrient.erista.me/files/No-Intro/Nintendo%20-%20Super%20Nintendo%20Entertainment%20System/"
search_extension = ".zip"
region_check = True
region = "Europe"
world = True
chunk_size = 8192 * 4
download_dir = "out-snes/"
os.makedirs(download_dir, exist_ok=True)
# Pre-load completed files
completed_files = set(os.listdir(download_dir))
def fetch_links(target_url):
"""Fetch links from the given URL."""
try:
logger.info(f"🌐 Fetching page: {target_url}")
response = session.get(target_url)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
return [
urljoin(target_url, unquote(a_tag["href"]))
for a_tag in soup.find_all("a", href=True)
if a_tag["href"].endswith(search_extension)
]
except requests.exceptions.RequestException as e:
logger.error(f"❌ Error fetching page: {e}")
return []
def download_file(url):
"""Download a single file."""
local_filename = os.path.join(download_dir, os.path.basename(url))
if os.path.basename(local_filename) in completed_files:
logger.info(f"βœ… Already downloaded: {local_filename}")
return
if region_check and (region not in local_filename and not ("(World)" in local_filename and world)):
logger.info(f"❌ Skipping file: {local_filename}, not for region '{region}'")
return
try:
logger.info(f"⬇️ Starting download: {url}")
with session.get(url, stream=True) as response:
response.raise_for_status()
with open(local_filename, 'wb') as f:
for chunk in response.iter_content(chunk_size):
if chunk:
f.write(chunk)
logger.info(f"βœ… Download completed: {local_filename}")
completed_files.add(os.path.basename(local_filename))
except requests.exceptions.RequestException as e:
logger.error(f"❌ Error downloading file {url}: {e}")
def main():
start_time = time()
try:
# Fetch all links
links = fetch_links(url)
if not links:
logger.warning("⚠️ No links found!")
return
logger.info(f"πŸ”— Found {len(links)} links to process.")
with ThreadPoolExecutor(max_workers=8) as executor:
executor.map(download_file, links)
except KeyboardInterrupt:
logger.warning("❌ Download interrupted by user.")
end_time = time()
elapsed_time = end_time - start_time
logger.info(f"⏱️ All downloads completed in {elapsed_time:.2f} seconds.")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment