Created
February 27, 2025 07:45
-
-
Save FrancescoJo/7ae6141bd04eb4e7c5fc691b18fa5d21 to your computer and use it in GitHub Desktop.
Run DeepSeek Models on my good old PC
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from fastapi import FastAPI, HTTPException | |
from pydantic import BaseModel | |
from vllm import LLM, SamplingParams | |
app = FastAPI() | |
model_path = "/home/deploy/workspace/DeepSeek-R1-Distill-Llama-8B" | |
llm = LLM(model=model_path, gpu_memory_utilization=0.9, tensor_parallel_size=1, enforce_eager=True, max_model_len=16384) | |
sampling_params = SamplingParams(temperature=0.6, max_tokens=16384) | |
class QueryRequest(BaseModel): | |
query: str | |
@app.post("/generate/") | |
async def generate_response(request: QueryRequest): | |
try: | |
response = llm.generate(request.query, sampling_params) | |
result_text = response[0].outputs[0].text | |
return {"response": result_text} | |
except Exception as e: | |
raise HTTPException(status_code = 500, detail = str(e)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package-mode = false | |
[project] | |
name = "hwan-deepseek-with-fastapi" | |
version = "0.1.0" | |
description = "Add your description here" | |
requires-python = ">=3.12" | |
dependencies = [ | |
"uvicorn==0.34.0", | |
"fastapi==0.115.8", | |
"pydantic==2.10.6", | |
"vllm==0.7.0", | |
] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment