Last active
November 21, 2021 14:14
-
-
Save ceres-c/6741b8a08765b83aef61f2b4ed4cedfb to your computer and use it in GitHub Desktop.
MediaLibraryOnline audiobook downloader - Download stream-only audiobooks
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /bin/python3 | |
import os | |
from urllib.parse import unquote | |
import httpx | |
import lxml.html | |
unit_id = 100009140 # Numeric unit id, can be found in URL as "id" parameter | |
cookies = {'ASP.NET_SessionId': ''} # Alphanumeric cookie | |
current_path = os.path.dirname(__file__) | |
def create_download_folder(): | |
if not os.path.exists(os.path.join(current_path, 'downloaded')): | |
os.mkdir(os.path.join(current_path, 'downloaded')) | |
def get_chapters(cookies, unit_id): | |
url = 'https://bergamo.medialibrary.it/media/mmPlayer.aspx' | |
params = ( | |
('unid', unit_id), | |
('type', '720'), # Can't be modified, otherwise will get some error about javascript not being supported | |
('sbox', 1), # Same as above | |
) | |
res = httpx.get(url, cookies=cookies, params=params) | |
if not res: | |
raise Exception(f"Couldn't download chapter data, got HTTP error {res.status_code}") | |
root = lxml.html.fromstring(res.text) | |
chapters = root.xpath('//div[@class="trackscontainer hidden"]/ol/*/a') | |
if len(chapters) == 0: | |
raise Exception('Did not find any chapter. Is the unit id correct?') | |
def get_data_src(lxml_element): | |
return lxml_element.get('data-src') | |
return list(map(get_data_src, chapters)) | |
def download_chapter(cookies, index, aspx_url): | |
url = f'https://bergamo.medialibrary.it/media/{aspx_url}' | |
res = httpx.get(url, cookies=cookies) # Suboptimal download in ram, but can safely assume (TM) files are small enough | |
filename = unquote(str(res.url)[str(res.url).rfind('/')+1:]) | |
local_path = os.path.join( | |
current_path, | |
'downloaded', | |
'{} - {}'.format(index, filename.replace('..', '').replace('/', '').replace('\\', '')), # Poor man's sanitization | |
) | |
with open(local_path, 'wb') as out_file: | |
out_file.write(res.content) | |
chapters = get_chapters(cookies, unit_id) | |
create_download_folder() | |
for index, chap_url in enumerate(chapters): | |
print(f'Downloading file {index + 1}/{len(chapters)}', end='\r') | |
download_chapter(cookies, index + 1, chap_url) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment