cpfiffer · June 12, 2025 08:21 · cpfiffer · Feb 4, 2025
diff --git a/r1-structured.py b/r1-structured.py
 import time
 from typing import Literal
 import outlines
 import re
 import torch
 from transformers import AutoTokenizer
 from outlines.fsm.json_schema import convert_json_schema_to_str
 from outlines_core.fsm.json_schema import build_regex_from_schema
 from pydantic import BaseModel

 # Loading the model.
 # model_string = 'deepseek-ai/DeepSeek-R1-Distill-Qwen-7B'
 model_string = 'deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B' # For smaller machines

 # We'll use the vllm backend for this model, but you can use any other backend.
 model = outlines.models.transformers(model_string, device='cuda')
 tokenizer = AutoTokenizer.from_pretrained(model_string)

 # Set up response format you want the LLM to respond with.
 class YesNo(BaseModel):
    answer: Literal['yes', 'no']

 yesno_regex = build_regex_from_schema(convert_json_schema_to_str(YesNo))

 # Add the thinking prefix to the regex 
 thinking_regex = r'<think>([^<]|<[^\/]|<\/[^t]|<\/t[^h]|<\/th[^i]|<\/thi[^n]|<\/thin[^k]|<\/think[^>])*<\/think>\n'

 result_regex = thinking_regex + yesno_regex
 print(result_regex)

 # Apply the chat template
 prompt = tokenizer.apply_chat_template(
    [
        {'role': 'system', 'content': 'You are a helpful assistant.'},
        {'role': 'user', 'content': 'Roses are red. Violets are blue. Are roses and violets the same color? Yes or no. Provide a chain of thought inside a <think> tag, closing with </think> when you are finished. After, please write JSON with the following schema: {"answer": "yes" | "no"}'},
    ],
    tokenize=False,
    add_generation_prompt=True,
 )

 # Generator 
 start_time = time.time()
 generator = outlines.generate.regex(model, result_regex)
 end_time = time.time()
 print(f"Time taken to create generator: {end_time - start_time} seconds")

 # Generate the response
 result = generator(prompt, max_tokens=1000)
 print(result)

 # Parse out the thinking + structured result 
 thinking_result = re.search(thinking_regex, result).group(1).strip()
 structured_result = re.search(yesno_regex, result).group(0).strip()

 # Print the result. The first group is the thinking, the second is the structured result.
 print("Chain of thought")
 print("----------------")
 print(thinking_result)
 print("\nStructured output")
 print("----------------")
 print(structured_result)

 # Parse the structured result
 output = YesNo.model_validate_json(structured_result)

 print("\nPydantic output")
 print("----------------")
 print(output)
	import time
	from typing import Literal
	import outlines
	import re
	import torch
	from transformers import AutoTokenizer
	from outlines.fsm.json_schema import convert_json_schema_to_str
	from outlines_core.fsm.json_schema import build_regex_from_schema
	from pydantic import BaseModel

	# Loading the model.
	# model_string = 'deepseek-ai/DeepSeek-R1-Distill-Qwen-7B'
	model_string = 'deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B' # For smaller machines

	# We'll use the vllm backend for this model, but you can use any other backend.
	model = outlines.models.transformers(model_string, device='cuda')
	tokenizer = AutoTokenizer.from_pretrained(model_string)

	# Set up response format you want the LLM to respond with.
	class YesNo(BaseModel):
	answer: Literal['yes', 'no']

	yesno_regex = build_regex_from_schema(convert_json_schema_to_str(YesNo))

	# Add the thinking prefix to the regex
	thinking_regex = r'<think>([^<]\|<[^\/]\|<\/[^t]\|<\/t[^h]\|<\/th[^i]\|<\/thi[^n]\|<\/thin[^k]\|<\/think[^>])*<\/think>\n'

	result_regex = thinking_regex + yesno_regex
	print(result_regex)

	# Apply the chat template
	prompt = tokenizer.apply_chat_template(
	[
	{'role': 'system', 'content': 'You are a helpful assistant.'},
	{'role': 'user', 'content': 'Roses are red. Violets are blue. Are roses and violets the same color? Yes or no. Provide a chain of thought inside a <think> tag, closing with </think> when you are finished. After, please write JSON with the following schema: {"answer": "yes" \| "no"}'},
	],
	tokenize=False,
	add_generation_prompt=True,
	)

	# Generator
	start_time = time.time()
	generator = outlines.generate.regex(model, result_regex)
	end_time = time.time()
	print(f"Time taken to create generator: {end_time - start_time} seconds")

	# Generate the response
	result = generator(prompt, max_tokens=1000)
	print(result)

	# Parse out the thinking + structured result
	thinking_result = re.search(thinking_regex, result).group(1).strip()
	structured_result = re.search(yesno_regex, result).group(0).strip()

	# Print the result. The first group is the thinking, the second is the structured result.
	print("Chain of thought")
	print("----------------")
	print(thinking_result)
	print("\nStructured output")
	print("----------------")
	print(structured_result)

	# Parse the structured result
	output = YesNo.model_validate_json(structured_result)

	print("\nPydantic output")
	print("----------------")
	print(output)