Skip to content

Instantly share code, notes, and snippets.

@FrancescoJo
Created February 27, 2025 07:45
Show Gist options
  • Save FrancescoJo/7ae6141bd04eb4e7c5fc691b18fa5d21 to your computer and use it in GitHub Desktop.
Save FrancescoJo/7ae6141bd04eb4e7c5fc691b18fa5d21 to your computer and use it in GitHub Desktop.
Run DeepSeek Models on my good old PC
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from vllm import LLM, SamplingParams
app = FastAPI()
model_path = "/home/deploy/workspace/DeepSeek-R1-Distill-Llama-8B"
llm = LLM(model=model_path, gpu_memory_utilization=0.9, tensor_parallel_size=1, enforce_eager=True, max_model_len=16384)
sampling_params = SamplingParams(temperature=0.6, max_tokens=16384)
class QueryRequest(BaseModel):
query: str
@app.post("/generate/")
async def generate_response(request: QueryRequest):
try:
response = llm.generate(request.query, sampling_params)
result_text = response[0].outputs[0].text
return {"response": result_text}
except Exception as e:
raise HTTPException(status_code = 500, detail = str(e))
package-mode = false
[project]
name = "hwan-deepseek-with-fastapi"
version = "0.1.0"
description = "Add your description here"
requires-python = ">=3.12"
dependencies = [
"uvicorn==0.34.0",
"fastapi==0.115.8",
"pydantic==2.10.6",
"vllm==0.7.0",
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment