gabrielfeo · January 24, 2025 19:20
diff --git a/download-deec-assets.py b/download-deec-assets.py
 #!/usr/bin/env python3

 '''
 Downloads requested assets based on a JSON file that lists a directory tree.

 USAGE:
    ./download-deec-assets.py --json-file deec-assets.json --dir-name '[E2]Eletronica_2'
    # or
    ./download-deec-assets.py --json-file deec-assets.json --dir-id '0fx80'

 To obtain the JSON file, watch requests with the browser web inspector when first opening
 https://my.deec.uc.pt/media/material. **Make sure to open it right before running the script**
 (otherwise the server responds to API requests with HTTP 504 for whatever reason).

 The JSON file is formatted as in this example (begining of the file):
 ```json
 {
    "fileMap": {
        "55964": {
            "id": "55964",
            "name": "Aulas",
            "modDate": "2024-02-27T15:35:56.898Z",
            "isDir": true,
            "childrenCount": 2,
            "childrenIds": [
                "55al4",
                "55al5"
            ],
            "parentId": "f9qs"
        },
        "55974": {
            "id": "55974",
            "name": "testes",
            "modDate": "2024-02-27T15:39:59.170Z",
            "isDir": true,
            "childrenCount": 1,
            "childrenIds": [
                "55aog"
            ],
            "parentId": "fcpv"
        },
        "55975": {
            "id": "55975",
            "name": "aulas",
            "modDate": "2024-02-27T15:40:10.882Z",
            "isDir": true,
            "childrenCount": 5,
            "childrenIds": [
                "55aob",
                "55aof",
                "55aoc",
                "55aod",
                "55aoe"
            ],
            "parentId": "fcyg"
        },
 ```

 Prompt this script was based on (for future reference):

 The script requires a `dir` arg, which is the name of a directory. It then parses
 for a single element in `fileMap` that has the same `name` value as the `dir` arg.
 Then, it recurses all the children of that element, based on the `childrenIds` property,
 and downloads each file that is not a directory, as indicated by `isDir` of each element.
 Files are download with a `curl -LO https://my.deec.uc.pt/api/assets/explorer/<id>` command,
 where `<id>` is the `id` property of each element, started via `subprocess`. Files are
 downloaded concurrently.
 '''

 import json
 import sys
 import subprocess
 import argparse
 from pathlib import Path

 ASSETS_URL = 'https://my.deec.uc.pt/api/assets/explorer'

 downloads = []

 def download_file(element, output_dir):
    url = f"{ASSETS_URL}/{element['id']}"
    print(f"Downloading file: {element['name']} (ID: {element['id']})...")
    return subprocess.Popen(["curl", "-sSL", url, "-o", str(output_dir / element['name'])])

 def require_single_directory(file_map, dir_name):
    matching_dirs = [element for element in file_map.values() if element['name'] == dir_name and is_dir(element)]
    if len(matching_dirs) == 1:
        return matching_dirs[0]
    if len(matching_dirs) < 1:
        print(f"Error: Found no directory named '{dir_name}' in the file tree. Pass the exact name of the directory, or pass `--dir-id` instead. See")
    if len(matching_dirs) > 1:
        print(f"Error: Found more than one directory named '{dir_name}' in the file tree. You may pass `--dir-id` instead. See --help.")
    sys.exit(1)

 def is_dir(element):
    return 'isDir' in element and element['isDir']

 def download_assets(file_map, dir_id, output_dir):
    for element in file_map.values():
        if element['id'] == dir_id and is_dir(element):
            print(f"Traversing directory: {element['name']} (ID: {element['id']})")
            for child_id in element['childrenIds']:
                child = file_map[child_id]
                if is_dir(child):
                    download_assets(file_map, child['id'], output_dir)
                else:
                    downloads.append(download_file(child, output_dir))
            break

 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Download requested assets based on a JSON file that lists a directory tree.')
    parser.add_argument('--output-dir', type=Path, default=Path('downloads'), help='Output directory to save the files')
    parser.add_argument('--json-file', type=str, required=True, help='Path to the JSON file')
    parser.add_argument('--dir-name', type=str, help='Pass either this or --dir-id. Name of the directory to download')
    parser.add_argument('--dir-id', type=str, help="Pass either this or --dir-name. ID of the directory to download. To obtain the ID of the directory, open the browser developer tools, focus on the directory's icon and copy the value of its `data-chonky-file-id` attribute from the HTML. Note that the directory may consist of multiple nested divs, so make sure to look for the ID in all of them.")
    args = parser.parse_args()

    data = None
    with open(args.json_file, 'r') as f:
        data = json.load(f)

    if (not args.dir_name and not args.dir_id) or (args.dir_name and args.dir_id):
        print("Error: Pass exacly one of `dir_name` or `dir_id`. See --help.")
        sys.exit(1)

    args.output_dir.mkdir(exist_ok=True)

    dir_id = args.dir_id or require_single_directory(data['fileMap'], args.dir_name)['id']
    download_assets(data['fileMap'], dir_id, args.output_dir)

    for download in downloads:
        download.wait()
	#!/usr/bin/env python3

	'''
	Downloads requested assets based on a JSON file that lists a directory tree.

	USAGE:
	./download-deec-assets.py --json-file deec-assets.json --dir-name '[E2]Eletronica_2'
	# or
	./download-deec-assets.py --json-file deec-assets.json --dir-id '0fx80'

	To obtain the JSON file, watch requests with the browser web inspector when first opening
	https://my.deec.uc.pt/media/material. Make sure to open it right before running the script
	(otherwise the server responds to API requests with HTTP 504 for whatever reason).

	The JSON file is formatted as in this example (begining of the file):
	```json
	{
	"fileMap": {
	"55964": {
	"id": "55964",
	"name": "Aulas",
	"modDate": "2024-02-27T15:35:56.898Z",
	"isDir": true,
	"childrenCount": 2,
	"childrenIds": [
	"55al4",
	"55al5"
	],
	"parentId": "f9qs"
	},
	"55974": {
	"id": "55974",
	"name": "testes",
	"modDate": "2024-02-27T15:39:59.170Z",
	"isDir": true,
	"childrenCount": 1,
	"childrenIds": [
	"55aog"
	],
	"parentId": "fcpv"
	},
	"55975": {
	"id": "55975",
	"name": "aulas",
	"modDate": "2024-02-27T15:40:10.882Z",
	"isDir": true,
	"childrenCount": 5,
	"childrenIds": [
	"55aob",
	"55aof",
	"55aoc",
	"55aod",
	"55aoe"
	],
	"parentId": "fcyg"
	},
	```

	Prompt this script was based on (for future reference):

	The script requires a `dir` arg, which is the name of a directory. It then parses
	for a single element in `fileMap` that has the same `name` value as the `dir` arg.
	Then, it recurses all the children of that element, based on the `childrenIds` property,
	and downloads each file that is not a directory, as indicated by `isDir` of each element.
	Files are download with a `curl -LO https://my.deec.uc.pt/api/assets/explorer/<id>` command,
	where `<id>` is the `id` property of each element, started via `subprocess`. Files are
	downloaded concurrently.
	'''

	import json
	import sys
	import subprocess
	import argparse
	from pathlib import Path

	ASSETS_URL = 'https://my.deec.uc.pt/api/assets/explorer'

	downloads = []

	def download_file(element, output_dir):
	url = f"{ASSETS_URL}/{element['id']}"
	print(f"Downloading file: {element['name']} (ID: {element['id']})...")
	return subprocess.Popen(["curl", "-sSL", url, "-o", str(output_dir / element['name'])])

	def require_single_directory(file_map, dir_name):
	matching_dirs = [element for element in file_map.values() if element['name'] == dir_name and is_dir(element)]
	if len(matching_dirs) == 1:
	return matching_dirs[0]
	if len(matching_dirs) < 1:
	print(f"Error: Found no directory named '{dir_name}' in the file tree. Pass the exact name of the directory, or pass `--dir-id` instead. See")
	if len(matching_dirs) > 1:
	print(f"Error: Found more than one directory named '{dir_name}' in the file tree. You may pass `--dir-id` instead. See --help.")
	sys.exit(1)

	def is_dir(element):
	return 'isDir' in element and element['isDir']

	def download_assets(file_map, dir_id, output_dir):
	for element in file_map.values():
	if element['id'] == dir_id and is_dir(element):
	print(f"Traversing directory: {element['name']} (ID: {element['id']})")
	for child_id in element['childrenIds']:
	child = file_map[child_id]
	if is_dir(child):
	download_assets(file_map, child['id'], output_dir)
	else:
	downloads.append(download_file(child, output_dir))
	break

	if __name__ == "__main__":
	parser = argparse.ArgumentParser(description='Download requested assets based on a JSON file that lists a directory tree.')
	parser.add_argument('--output-dir', type=Path, default=Path('downloads'), help='Output directory to save the files')
	parser.add_argument('--json-file', type=str, required=True, help='Path to the JSON file')
	parser.add_argument('--dir-name', type=str, help='Pass either this or --dir-id. Name of the directory to download')
	parser.add_argument('--dir-id', type=str, help="Pass either this or --dir-name. ID of the directory to download. To obtain the ID of the directory, open the browser developer tools, focus on the directory's icon and copy the value of its `data-chonky-file-id` attribute from the HTML. Note that the directory may consist of multiple nested divs, so make sure to look for the ID in all of them.")
	args = parser.parse_args()

	data = None
	with open(args.json_file, 'r') as f:
	data = json.load(f)

	if (not args.dir_name and not args.dir_id) or (args.dir_name and args.dir_id):
	print("Error: Pass exacly one of `dir_name` or `dir_id`. See --help.")
	sys.exit(1)

	args.output_dir.mkdir(exist_ok=True)

	dir_id = args.dir_id or require_single_directory(data['fileMap'], args.dir_name)['id']
	download_assets(data['fileMap'], dir_id, args.output_dir)

	for download in downloads:
	download.wait()