Last active
July 27, 2023 06:51
-
-
Save mcapodici/eaa39861affe75be13badefbe9e05079 to your computer and use it in GitHub Desktop.
modal local code runner script
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import modal | |
import os | |
# Script to run your local code | |
# | |
# * Requires python package modal-client install, and you have obtained a token, e.g.: | |
# ``` | |
# pip install modal-client | |
# modal token new | |
# ``` | |
# * You need to set up the parameters before running the script | |
# | |
# Example usage: | |
# | |
# ``` | |
# modal run lob.py --command prepare | |
# modal run lob.py --command train | |
# modal run lob.py --command sample | |
# ```` | |
# | |
# Note: There is a persistent volume at /volume for downloading large models, saving model checkpoints etc. to. | |
# | |
# How to use on nanoGPT: | |
# ``` | |
# git clone https://github.com/karpathy/nanoGPT | |
# cd nanoGPT | |
# mkdir src | |
# mv *.py data config src | |
# ``` | |
# | |
import os | |
import modal | |
# Parameters for run | |
# ========================================================================================= | |
# Choose one of: "t4", "a10g", "inf2", "a100-20g", "a100" or None | |
gpu="t4" | |
commands={ | |
'prepare': ['python data/shakespeare_char/prepare.py'], | |
'train': ['python train.py config/train_shakespeare_char.py'], | |
'sample': ['python sample.py --out_dir=out-shakespeare-char'], | |
} | |
verbose=True | |
volume_name_prefix="2023-07-27-10-45" | |
timeout_mins=60 | |
exclude_paths_starting_with=["./.git", "./.github", "./bin", "./lib", "./share"] | |
image = modal.Image \ | |
.debian_slim() \ | |
.apt_install("rsync") \ | |
.pip_install("torch numpy transformers datasets tiktoken wandb tqdm".split(" ")) | |
# ========================================================================================= | |
# End parameters for run | |
cloud="gcp" if gpu and gpu.startswith("a100") else "aws" | |
volume_name = f"{volume_name_prefix}-{cloud}" | |
print(f"💾 using volume name: {volume_name}") | |
volume = modal.NetworkFileSystem.new().persisted(volume_name) | |
stub = modal.Stub("lob-run", image = image) | |
def file_condition(path: str): | |
for exclude_if_in_path in exclude_paths_starting_with: | |
if path.startswith(exclude_if_in_path): | |
return False | |
megabytes = round(os.stat(path).st_size / (1024 * 1024), 2) | |
sizemsg = f"({megabytes}Mb) " if megabytes >= 1 else "" | |
if verbose: | |
print(f"{sizemsg} {path}") | |
return True; | |
@stub.local_entrypoint() | |
def run(command): | |
if not command in commands.keys(): | |
possible_commands = ", ".join(commands.keys()); | |
print(f"Command not recognised. Possible commands: {possible_commands}", file=sys.stderr) | |
exit(1) | |
command_text = commands[command] | |
print(f'Command {command} was chosen.') | |
print(f'This will run: {command_text}') | |
copy.call(command_text) | |
@stub.function( | |
cloud=cloud, | |
gpu=gpu, | |
timeout=timeout_mins*60, | |
mounts=[modal.Mount.from_local_dir(".", remote_path="/source/code", condition=file_condition)], | |
network_file_systems={"/root/code": volume}) | |
def copy(commands: "list[str]"): | |
import os | |
source = "/source/code/" | |
dest = "/root/code/" | |
print("📁 Running rsync to copy files up to container:") | |
# -r, --recursive recurse into directories | |
# -u, --update skip files that are newer on the receiver | |
# -l, --links copy symlinks as symlinks | |
# --copy-unsafe-links only "unsafe" symlinks are transformed | |
# -p, --perms preserve permissions | |
# -t, --times preserve modification times | |
# --progress show progress during transfer | |
os.system(f"rsync -r -u -l --copy-unsafe-links -p -t --progress {source} {dest}") | |
print("🐍 Using remote python version:") | |
os.system(f"python --version") | |
os.chdir("code") | |
for command in commands: | |
print(f"🏃🏽Executing command: {command}") | |
os.system(command) | |
# # Parameters for run | |
# # ========================================================================================= | |
# # Choose one of: "t4", "a10g", "inf2", "a100-20g", "a100" or None | |
# gpu=None | |
# volume_name_prefix = "temp20230719" | |
# timeout_minutes = 10 | |
# # Set up your image here: | |
# image = modal.Image \ | |
# .debian_slim() \ | |
# .pip_install("torch numpy transformers datasets tiktoken wandb tqdm".split(" ")) | |
# # ========================================================================================= | |
# # End parameters for run | |
# # To avoid slowness/egress, make a seperate volume for aws instances vs. gcp | |
# cloud="gcp" if gpu and gpu.startswith("a100") else "aws" | |
# volume_name = f"{volume_name_prefix}-{cloud}" | |
# print(f"💾 using volume name: {volume_name}") | |
# volume = modal.NetworkFileSystem.new().persisted(volume_name) | |
# stub = modal.Stub("lob-run", | |
# image=image | |
# ) | |
# @stub.local_entrypoint() | |
# def main(command: str): | |
# run_command.call(command) | |
# @stub.function( | |
# cloud=cloud, | |
# gpu=gpu, | |
# timeout=timeout_minutes * 60, | |
# mounts=[modal.Mount.from_local_dir("./src", remote_path="/src")], | |
# network_file_systems={"/volume": volume}) | |
# def run_command(command: str): | |
# os.chdir("/src") | |
# os.system(command) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment