Jon Durbin jondurbin

"easy" vllm endpoint

You can call this endpoint and it will automatically select the most recent vllm image:

curl -XPOST https://api.chutes.ai/chutes/vllm \
  -H 'content-type: application/json' \
   -H 'Authorization: cpk...' \
  -d '{
    "tagline": "Mistral 24b Instruct",
    "model": "unsloth/Mistral-Small-24B-Instruct-2501",
    "public": true,

Install chutes (and bittensor if you don't already have a coldkey/hotkey)

python3 -m venv chutes-venv
source chutes-venv/bin/activate
pip install chutes 'bittensor<8'

If you don't already have a coldkey/hotkey (replace chutes/chuteshk with your desired coldkey/hotkey names)

Trained on 10x a6000 GPUs on runpod.io.

I actually ran many fine-tunes, including multiple full-finetunes, fp16 loras, and qloras, and the below qlora actually did best in my testing.

dataset: https://hf.co/datasets/jondurbin/airoboros-3.1 (plus a few unpublished de-censoring instructions)

training script: https://github.com/jondurbin/qlora specifically commit 8cd269bf9bd7753c92164934269019e12f23314f

export BASE_DIR=/workspace

	import os
	import base64
	import openai
	import glob

	client = openai.Client(base_url="https://llm.chutes.ai/v1", api_key=os.environ["CHUTES_API_KEY"])

	image_base64s = []
	for path in glob.glob("/home/jdurbin/Downloads/logo*.png")[:8]:
	with open(path, "rb") as infile:

	import os
	import base64
	import openai
	import glob

	client = openai.Client(base_url="https://llm.chutes.ai/v1", api_key=os.environ["CHUTES_API_KEY"])

	image_base64s = []
	for path in glob.glob("/home/jdurbin/Downloads/logo*.png")[:8]:
	with open(path, "rb") as infile:

	import os
	import requests
	import base64

	audio = base64.b64encode(open("test.wav", "rb").read()).decode()
	result = requests.post(
	"https://chutes-spark-tts.chutes.ai/speak",
	json={
	"text": "How much wood would a woodchuck chuck if a woodchuck could chuck wood?",
	"sample_audio_b64": audio,

	import os
	import requests
	import base64

	audio = base64.b64encode(open("test.wav", "rb").read()).decode()
	result = requests.post(
	"https://chutes-csm-1b.chutes.ai/speak",
	json={
	"speaker": 1,
	"context": [

	{
	"id": "27ab0d1289814bb28c7c30e38a98df8d",
	"object": "chat.completion",
	"created": 1742109451,
	"model": "cognitivecomputations/Dolphin3.0-Mistral-24B",
	"choices": [
	{
	"index": 0,
	"message": {
	"role": "assistant",

	import aiohttp
	import asyncio
	import os
	import re
	from loguru import logger
	from playwright.async_api import async_playwright


	async def check_site(browser, domain, status):
	page = await browser.new_page()

	import re
	import gc
	import os
	import glob
	import json
	from copy import deepcopy
	from datasets import concatenate_datasets, Dataset
	from transformers import AutoTokenizer
	from huggingface_hub import snapshot_download