-
-
Save Mistobaan/8e7db052d608374799c26671dd2a3cef to your computer and use it in GitHub Desktop.
Deploy Dolly v2.0 to SageMaker
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"id": "ee793bea-d5b6-46bf-acc7-a0107e8ed809", | |
"metadata": {}, | |
"source": [ | |
"# Deploy Dolly 2\n", | |
"\n", | |
"Resources:\n", | |
"* [Dolly 2](https://www.databricks.com/blog/2023/04/12/dolly-first-open-commercially-viable-instruction-tuned-llm)\n", | |
"* [HuggingFace Model Card](https://huggingface.co/databricks/dolly-v2-12b)\n", | |
"* [Dolly Github Repo](https://github.com/databrickslabs/dolly#getting-started-with-response-generation)\n", | |
"* [Instructions For Deploying LLM on Sagemaker](https://www.philschmid.de/deploy-flan-ul2-sagemaker)\n", | |
"* [Simple deployment instructions for Dolly 2.0](https://github.com/databrickslabs/dolly/issues/83#issuecomment-1509362673)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "99bb63e0-ac91-40d5-8c19-9a1984cb8974", | |
"metadata": {}, | |
"source": [ | |
"### Install and import dependencies" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "872dba4c-faf7-4ded-933c-e8829453c266", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"%%capture\n", | |
"!pip install sagemaker huggingface_hub hf-transfer --upgrade" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "9f7b7d17-af87-41b6-b61a-40a2ee1fcf05", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"import os\n", | |
"import shutil\n", | |
"from distutils.dir_util import copy_tree\n", | |
"from pathlib import Path\n", | |
"\n", | |
"# Set HF cache location and ensure that hf-transfer is used for faster download\n", | |
"os.environ[\"HF_HOME\"] = \"data\"\n", | |
"os.environ[\"HF_HUB_ENABLE_HF_TRANSFER\"] = \"1\"\n", | |
"\n", | |
"import sagemaker\n", | |
"from huggingface_hub import snapshot_download\n", | |
"from sagemaker.huggingface import HuggingFaceModel\n", | |
"from sagemaker.s3 import S3Uploader" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "10ae58db-d78a-4d5b-9f98-94e2d42cd59f", | |
"metadata": {}, | |
"source": [ | |
"### Get the IAM role" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "a12a6d2c-6ce3-4917-af9c-112c717a336e", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"sess = sagemaker.Session()\n", | |
"role = sagemaker.get_execution_role()\n", | |
"\n", | |
"print(f\"SageMaker role arn: {role}\")\n", | |
"print(f\"SageMaker bucket: {sess.default_bucket()}\")\n", | |
"print(f\"SageMaker session region: {sess.boto_region_name}\")" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "8550df6a-f8fc-429c-8dfa-8951195d9f9d", | |
"metadata": {}, | |
"source": [ | |
"### Download the model files from the HuggingFace Hub" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "88cbc0e6-930b-4f2a-98f6-e7f7981c3f0a", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"model_id = \"databricks/dolly-v2-12b\"\n", | |
"\n", | |
"model_tar_dir = Path(model_id.split(\"/\")[-1])\n", | |
"if model_tar_dir.exists():\n", | |
" shutil.rmtree(str(model_tar_dir))\n", | |
"model_tar_dir.mkdir(exist_ok=True)\n", | |
"\n", | |
"# Download model from Hugging Face into model_dir\n", | |
"snapshot_download(model_id, local_dir=str(model_tar_dir), local_dir_use_symlinks=False)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "eae839b4-30b1-4996-b891-1542382163c6", | |
"metadata": {}, | |
"source": [ | |
"### Define custom dependencies and ensure model is loaded in 8 bit\n", | |
"\n", | |
"Note that the model doesn't need to be loaded in 8 bit if an instance with an A100 GPU is used." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "33a94d06-ad78-483f-a21d-16aac9d99c30", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"!mkdir -p $model_tar_dir/code" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "fe975020-09c6-4f23-8c0e-bbeb50f23249", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"%%writefile $model_tar_dir/code/requirements.txt\n", | |
"accelerate==0.18.0\n", | |
"transformers==4.27.2\n", | |
"bitsandbytes==0.38.1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "ec738829-6248-4d90-9c83-8a0eb2fb92f0", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"%%writefile $model_tar_dir/code/inference.py\n", | |
"import torch\n", | |
"from transformers import pipeline\n", | |
"\n", | |
"\n", | |
"def model_fn(model_dir):\n", | |
" instruct_pipeline = pipeline(\n", | |
" model=model_dir,\n", | |
" torch_dtype=torch.bfloat16,\n", | |
" trust_remote_code=True,\n", | |
" device_map=\"auto\",\n", | |
" model_kwargs={\"load_in_8bit\": True},\n", | |
" )\n", | |
"\n", | |
" return instruct_pipeline" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "3202a122-3151-4dc1-80b7-5451789704d4", | |
"metadata": {}, | |
"source": [ | |
"### Build model artifact and upload to S3" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "5b7bb33a-fef2-4a5f-84ad-624af3338c00", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"parent_dir=os.getcwd()\n", | |
"\n", | |
"# change to model dir\n", | |
"os.chdir(str(model_tar_dir))\n", | |
"\n", | |
"# use pigz for faster and parallel compression\n", | |
"!tar -cf model.tar.gz --use-compress-program=pigz *\n", | |
"\n", | |
"# change back to parent dir\n", | |
"os.chdir(parent_dir)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "c1ed03df-617a-45ec-b136-4acff3484b2c", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"s3_model_uri = S3Uploader.upload(\n", | |
" local_path=str(model_tar_dir.joinpath(\"model.tar.gz\")),\n", | |
" desired_s3_uri=f\"s3://{sess.default_bucket()}/{model_tar_dir}\",\n", | |
")\n", | |
"\n", | |
"print(f\"Model uploaded to: {s3_model_uri}\")" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "546670af-8d30-4e13-ba31-56fca933600e", | |
"metadata": {}, | |
"source": [ | |
"### Deploy the model" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "fe7b5f1c-7c60-4916-acf2-ab24737cb99f", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"huggingface_model = HuggingFaceModel(\n", | |
" model_data=s3_model_uri,\n", | |
" role=role,\n", | |
" transformers_version=\"4.26\",\n", | |
" pytorch_version=\"1.13\",\n", | |
" py_version='py39',\n", | |
" model_server_workers=1\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "7446689f-f0be-4b62-8131-0e8f5c3b0196", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"predictor = huggingface_model.deploy(\n", | |
" initial_instance_count=1,\n", | |
" instance_type=\"ml.g5.4xlarge\",\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "392caaed-a992-41c5-a4a6-9321f3b91589", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"prompt = \"\"\"Write me a tweet about the launch of Dolly 2.0, the new large language model from DataBricks trained on human-generated prompts.\"\"\"\n", | |
"response = predictor.predict({\"inputs\": prompt})\n", | |
"response[0][\"generated_text\"]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "dbe644d6-525e-4b9b-a267-2df4578f17e2", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "conda_pytorch_p39", | |
"language": "python", | |
"name": "conda_pytorch_p39" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.9.15" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment