Skip to content

Instantly share code, notes, and snippets.

@gabrielfeo
Last active January 24, 2025 19:20
Show Gist options
  • Save gabrielfeo/10e2f819e8b5c98fe3cf3534a451f6c6 to your computer and use it in GitHub Desktop.
Save gabrielfeo/10e2f819e8b5c98fe3cf3534a451f6c6 to your computer and use it in GitHub Desktop.
A script to download assets from an unintuitive University Of Coimbra file-hosting website.
#!/usr/bin/env python3
'''
Downloads requested assets based on a JSON file that lists a directory tree.
USAGE:
./download-deec-assets.py --json-file deec-assets.json --dir-name '[E2]Eletronica_2'
# or
./download-deec-assets.py --json-file deec-assets.json --dir-id '0fx80'
To obtain the JSON file, watch requests with the browser web inspector when first opening
https://my.deec.uc.pt/media/material. **Make sure to open it right before running the script**
(otherwise the server responds to API requests with HTTP 504 for whatever reason).
The JSON file is formatted as in this example (begining of the file):
```json
{
"fileMap": {
"55964": {
"id": "55964",
"name": "Aulas",
"modDate": "2024-02-27T15:35:56.898Z",
"isDir": true,
"childrenCount": 2,
"childrenIds": [
"55al4",
"55al5"
],
"parentId": "f9qs"
},
"55974": {
"id": "55974",
"name": "testes",
"modDate": "2024-02-27T15:39:59.170Z",
"isDir": true,
"childrenCount": 1,
"childrenIds": [
"55aog"
],
"parentId": "fcpv"
},
"55975": {
"id": "55975",
"name": "aulas",
"modDate": "2024-02-27T15:40:10.882Z",
"isDir": true,
"childrenCount": 5,
"childrenIds": [
"55aob",
"55aof",
"55aoc",
"55aod",
"55aoe"
],
"parentId": "fcyg"
},
```
Prompt this script was based on (for future reference):
The script requires a `dir` arg, which is the name of a directory. It then parses
for a single element in `fileMap` that has the same `name` value as the `dir` arg.
Then, it recurses all the children of that element, based on the `childrenIds` property,
and downloads each file that is not a directory, as indicated by `isDir` of each element.
Files are download with a `curl -LO https://my.deec.uc.pt/api/assets/explorer/<id>` command,
where `<id>` is the `id` property of each element, started via `subprocess`. Files are
downloaded concurrently.
'''
import json
import sys
import subprocess
import argparse
from pathlib import Path
ASSETS_URL = 'https://my.deec.uc.pt/api/assets/explorer'
downloads = []
def download_file(element, output_dir):
url = f"{ASSETS_URL}/{element['id']}"
print(f"Downloading file: {element['name']} (ID: {element['id']})...")
return subprocess.Popen(["curl", "-sSL", url, "-o", str(output_dir / element['name'])])
def require_single_directory(file_map, dir_name):
matching_dirs = [element for element in file_map.values() if element['name'] == dir_name and is_dir(element)]
if len(matching_dirs) == 1:
return matching_dirs[0]
if len(matching_dirs) < 1:
print(f"Error: Found no directory named '{dir_name}' in the file tree. Pass the exact name of the directory, or pass `--dir-id` instead. See")
if len(matching_dirs) > 1:
print(f"Error: Found more than one directory named '{dir_name}' in the file tree. You may pass `--dir-id` instead. See --help.")
sys.exit(1)
def is_dir(element):
return 'isDir' in element and element['isDir']
def download_assets(file_map, dir_id, output_dir):
for element in file_map.values():
if element['id'] == dir_id and is_dir(element):
print(f"Traversing directory: {element['name']} (ID: {element['id']})")
for child_id in element['childrenIds']:
child = file_map[child_id]
if is_dir(child):
download_assets(file_map, child['id'], output_dir)
else:
downloads.append(download_file(child, output_dir))
break
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Download requested assets based on a JSON file that lists a directory tree.')
parser.add_argument('--output-dir', type=Path, default=Path('downloads'), help='Output directory to save the files')
parser.add_argument('--json-file', type=str, required=True, help='Path to the JSON file')
parser.add_argument('--dir-name', type=str, help='Pass either this or --dir-id. Name of the directory to download')
parser.add_argument('--dir-id', type=str, help="Pass either this or --dir-name. ID of the directory to download. To obtain the ID of the directory, open the browser developer tools, focus on the directory's icon and copy the value of its `data-chonky-file-id` attribute from the HTML. Note that the directory may consist of multiple nested divs, so make sure to look for the ID in all of them.")
args = parser.parse_args()
data = None
with open(args.json_file, 'r') as f:
data = json.load(f)
if (not args.dir_name and not args.dir_id) or (args.dir_name and args.dir_id):
print("Error: Pass exacly one of `dir_name` or `dir_id`. See --help.")
sys.exit(1)
args.output_dir.mkdir(exist_ok=True)
dir_id = args.dir_id or require_single_directory(data['fileMap'], args.dir_name)['id']
download_assets(data['fileMap'], dir_id, args.output_dir)
for download in downloads:
download.wait()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment