Skip to content

Instantly share code, notes, and snippets.

@smellslikeml
Last active March 11, 2024 18:27

Revisions

  1. smellslikeml revised this gist Mar 11, 2024. 1 changed file with 1 addition and 2 deletions.
    3 changes: 1 addition & 2 deletions llm_worker.py
    Original file line number Diff line number Diff line change
    @@ -1,6 +1,5 @@
    # Launch nats-server
    # Download weights from https://huggingface.co/remyxai/stablelm-zephyr-3B_localmentor/blob/main/ggml-model-q4_0.gguf
    # to stablelm-localmentor.gguf
    # wget https://huggingface.co/remyxai/stablelm-zephyr-3B_localmentor/resolve/main/ggml-model-q4_0.gguf -o stablelm-localmentor_2.gguf
    import nats
    import asyncio
    from llama_cpp import Llama
  2. smellslikeml revised this gist Mar 11, 2024. 1 changed file with 3 additions and 0 deletions.
    3 changes: 3 additions & 0 deletions llm_worker.py
    Original file line number Diff line number Diff line change
    @@ -1,3 +1,6 @@
    # Launch nats-server
    # Download weights from https://huggingface.co/remyxai/stablelm-zephyr-3B_localmentor/blob/main/ggml-model-q4_0.gguf
    # to stablelm-localmentor.gguf
    import nats
    import asyncio
    from llama_cpp import Llama
  3. smellslikeml revised this gist Mar 11, 2024. No changes.
  4. smellslikeml created this gist Mar 11, 2024.
    25 changes: 25 additions & 0 deletions llm_worker.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,25 @@
    import nats
    import asyncio
    from llama_cpp import Llama


    async def llm_runner(nats_url, model_path, subject):
    nc = await nats.connect(nats_url)
    llm = Llama(model_path)

    async def inference_handler(msg):
    data = msg.data.decode()
    response = llm(data, max_tokens=2048, stop=["###", "\n\n"], echo=True)
    r = response["choices"][0]["text"]
    await nc.publish(msg.reply, str(r).encode())

    await nc.subscribe(subject, cb=inference_handler)
    await asyncio.Future()


    if __name__ == "__main__":
    asyncio.run(
    llm_runner(
    "nats://localhost:4222", "stablelm-localmentor.gguf", "inference.requests"
    )
    )