Skip to content

Instantly share code, notes, and snippets.

@cpfiffer
Last active April 18, 2025 10:36
Show Gist options
  • Save cpfiffer/e98fc71d4fcb35ba827fc9e679112895 to your computer and use it in GitHub Desktop.
Save cpfiffer/e98fc71d4fcb35ba827fc9e679112895 to your computer and use it in GitHub Desktop.
Using Outlines to get structured output from R1
import time
from typing import Literal
import outlines
import re
import torch
from transformers import AutoTokenizer
from outlines.fsm.json_schema import convert_json_schema_to_str
from outlines_core.fsm.json_schema import build_regex_from_schema
from pydantic import BaseModel
# Loading the model.
# model_string = 'deepseek-ai/DeepSeek-R1-Distill-Qwen-7B'
model_string = 'deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B' # For smaller machines
# We'll use the vllm backend for this model, but you can use any other backend.
model = outlines.models.transformers(model_string, device='cuda')
tokenizer = AutoTokenizer.from_pretrained(model_string)
# Set up response format you want the LLM to respond with.
class YesNo(BaseModel):
answer: Literal['yes', 'no']
yesno_regex = build_regex_from_schema(convert_json_schema_to_str(YesNo))
# Add the thinking prefix to the regex
thinking_regex = r'<think>([^<]|<[^\/]|<\/[^t]|<\/t[^h]|<\/th[^i]|<\/thi[^n]|<\/thin[^k]|<\/think[^>])*<\/think>\n'
result_regex = thinking_regex + yesno_regex
print(result_regex)
# Apply the chat template
prompt = tokenizer.apply_chat_template(
[
{'role': 'system', 'content': 'You are a helpful assistant.'},
{'role': 'user', 'content': 'Roses are red. Violets are blue. Are roses and violets the same color? Yes or no. Provide a chain of thought inside a <think> tag, closing with </think> when you are finished. After, please write JSON with the following schema: {"answer": "yes" | "no"}'},
],
tokenize=False,
add_generation_prompt=True,
)
# Generator
start_time = time.time()
generator = outlines.generate.regex(model, result_regex)
end_time = time.time()
print(f"Time taken to create generator: {end_time - start_time} seconds")
# Generate the response
result = generator(prompt, max_tokens=1000)
print(result)
# Parse out the thinking + structured result
thinking_result = re.search(thinking_regex, result).group(1).strip()
structured_result = re.search(yesno_regex, result).group(0).strip()
# Print the result. The first group is the thinking, the second is the structured result.
print("Chain of thought")
print("----------------")
print(thinking_result)
print("\nStructured output")
print("----------------")
print(structured_result)
# Parse the structured result
output = YesNo.model_validate_json(structured_result)
print("\nPydantic output")
print("----------------")
print(output)
@cpfiffer
Copy link
Author

cpfiffer commented Feb 4, 2025

If you want to use this, you should only need to change your prompt and the structure:

# Set up response format you want the LLM to respond with.
class YesNo(BaseModel):
    answer: Literal['yes', 'no']

yesno_regex = build_regex_from_schema(convert_json_schema_to_str(YesNo))

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment