Created
June 10, 2023 06:45
-
-
Save Tuhin-thinks/5e0921ed5b425603591020e9e2566517 to your computer and use it in GitHub Desktop.
Utility Python script to save files from server selectively.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from pathlib import Path | |
from urllib.parse import urlparse | |
from bs4 import BeautifulSoup | |
import requests | |
SAVE_DIR = Path(os.path.dirname(os.path.abspath(__file__))) / 'save' | |
TO_SAVE_DIR = ['Movies', 'TV Shows', 'Music', 'Books', 'Games', 'Software', 'Pictures'] | |
def get_all_links(response_content: requests.Response.content): | |
soup = BeautifulSoup(response_content, 'html.parser') | |
dirs = [] | |
files = [] | |
for link in soup.find_all('a'): | |
href = link.get('href') | |
if href.endswith('/'): | |
dirs.append(href) | |
else: | |
files.append(href) | |
return dirs, files | |
def get_local_path_from_url(link: str): | |
parsed_url = urlparse(link) | |
path = SAVE_DIR / parsed_url.path.lstrip('/') | |
return path | |
def save_file_from_url(link: str): | |
path = get_local_path_from_url(link) | |
if not path.parent.exists(): | |
path.parent.mkdir(parents=True) | |
with requests.session() as req_session: | |
resp = req_session.get(link) | |
with open(path, 'wb') as f: | |
for chunk in resp.iter_content(chunk_size=1024): | |
f.write(chunk) | |
def get_dir_name(link: str): | |
parsed_url = urlparse(link) | |
parsed_url_path = parsed_url.path.lstrip('/') | |
if parsed_url_path.endswith('/'): | |
return parsed_url_path.split('/')[-2] | |
else: | |
return parsed_url_path.split('/')[-3] | |
def main_parser(parse_address: str, parent_dir_name: str = None): | |
with requests.session() as req_session: | |
resp = req_session.get(parse_address) | |
dirs, files = get_all_links(resp.content) | |
if TO_SAVE_DIR and parent_dir_name in TO_SAVE_DIR: | |
for file in files: | |
print(get_local_path_from_url(file)) | |
save_file_from_url(f"{parse_address}/{file}") | |
for dir_link in dirs: | |
next_addr = f"{parse_address}/{dir_link}" | |
dir_parent_name = get_dir_name(next_addr) | |
if dir_parent_name not in TO_SAVE_DIR: | |
continue | |
main_parser(next_addr, dir_parent_name) | |
if __name__ == '__main__': | |
address = "http://192.168.1.34:8080" | |
main_parser(address) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment