You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
<|begin▁of▁sentence|><|User|>A man has 53 socks in his drawer: 21 identical blue, 15 identical black and 17 identical red. The lights are out, and he is completely in the dark. How many socks must he take out to make 100 percent certain he has at least one pair of black socks?<|Assistant|>
import json
import sagemaker
import boto3
from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri
try:
role = sagemaker.get_execution_role()
except ValueError:
iam = boto3.client('iam')
role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']
# Hub Model configuration. https://huggingface.co/models
hub = {
'HF_MODEL_ID':'deepseek-ai/DeepSeek-R1-Distill-Llama-8B',
'SM_NUM_GPUS': json.dumps(1)
}
# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
image_uri=get_huggingface_llm_image_uri("huggingface",version="3.0.1"),
env=hub,
role=role,
)
# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
initial_instance_count=1,
instance_type="ml.g5.2xlarge",
container_startup_health_check_timeout=300,
)
# send request
predictor.predict({
"inputs": "Hi, what can you help me with?",
})
AWS Inferentia2
import json
import sagemaker
import boto3
from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri
try:
role = sagemaker.get_execution_role()
except ValueError:
iam = boto3.client("iam")
role = iam.get_role(RoleName="sagemaker_execution_role")["Role"]["Arn"]
# Hub Model configuration. https://huggingface.co/models
hub = {
"HF_MODEL_ID": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
"HF_NUM_CORES": "2",
"HF_AUTO_CAST_TYPE": "bf16",
"MAX_BATCH_SIZE": "8",
"MAX_INPUT_TOKENS": "3686",
"MAX_TOTAL_TOKENS": "4096",
}
region = boto3.Session().region_name
image_uri = f"763104351884.dkr.ecr.{region}.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.2-optimum0.0.27-neuronx-py310-ubuntu22.04"
# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
image_uri=image_uri,
env=hub,
role=role,
)
# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
initial_instance_count=1,
instance_type="ml.inf2.xlarge",
container_startup_health_check_timeout=1800,
volume_size=512,
)
# send request
predictor.predict(
{
"inputs": "What is is the capital of France?",
"parameters": {
"do_sample": True,
"max_new_tokens": 128,
"temperature": 0.7,
"top_k": 50,
"top_p": 0.95,
}
}
)