cpfiffer · March 25, 2025 20:38 · cpfiffer · Mar 25, 2025
diff --git a/recursion.py b/recursion.py
 # Setup instructions:
 # pip install 'outlines[transformers]'

 import outlines 
 from transformers import AutoTokenizer
 import json

 # MODEL_STRING = "HuggingFaceTB/SmolLM2-135M-Instruct" # Small model
 # MODEL_STRING = "HuggingFaceTB/SmolLM2-1.7B-Instruct" # Larger but kind of boring
 MODEL_STRING = "NousResearch/Hermes-3-Llama-3.1-8B"

 schema = """
 {
  "type": "object",
  "properties": {
    "concept": {
      "type": "string",
      "description": "The main concept or topic"
    },
    "definition": {
      "type": "string",
      "description": "Brief definition of the concept"
    },
    "related_concepts": {
      "type": "array",
      "items": {
        "$ref": "#"
      },
      "maxItems": 3,
      "description": "Related sub-concepts that help explain the main concept"
    }
  },
  "required": ["concept", "definition"]
 }
 """

 llm = outlines.models.transformers(MODEL_STRING)
 tokenizer = AutoTokenizer.from_pretrained(MODEL_STRING)

 generator = outlines.generate.json(llm, schema)

 system_prompt = """
 You provide concept maps.
 ```
 """
 prompt = """
 Decompose the concept of "architecture" into a concept map -- each concept should have a name and definition,
 and then a list of related concepts. Related concepts are optional, but they should be sub-concepts of their
 parents.
 """

 formatted_prompt = tokenizer.apply_chat_template(
    [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": prompt},
    ],
    tokenize=False,
    add_generation_prompt=True,
 )

 result = generator(formatted_prompt, max_tokens=1000)
 print(json.dumps(result, indent=2))
	# Setup instructions:
	# pip install 'outlines[transformers]'

	import outlines
	from transformers import AutoTokenizer
	import json

	# MODEL_STRING = "HuggingFaceTB/SmolLM2-135M-Instruct" # Small model
	# MODEL_STRING = "HuggingFaceTB/SmolLM2-1.7B-Instruct" # Larger but kind of boring
	MODEL_STRING = "NousResearch/Hermes-3-Llama-3.1-8B"

	schema = """
	{
	"type": "object",
	"properties": {
	"concept": {
	"type": "string",
	"description": "The main concept or topic"
	},
	"definition": {
	"type": "string",
	"description": "Brief definition of the concept"
	},
	"related_concepts": {
	"type": "array",
	"items": {
	"$ref": "#"
	},
	"maxItems": 3,
	"description": "Related sub-concepts that help explain the main concept"
	}
	},
	"required": ["concept", "definition"]
	}
	"""

	llm = outlines.models.transformers(MODEL_STRING)
	tokenizer = AutoTokenizer.from_pretrained(MODEL_STRING)

	generator = outlines.generate.json(llm, schema)

	system_prompt = """
	You provide concept maps.
	```
	"""
	prompt = """
	Decompose the concept of "architecture" into a concept map -- each concept should have a name and definition,
	and then a list of related concepts. Related concepts are optional, but they should be sub-concepts of their
	parents.
	"""

	formatted_prompt = tokenizer.apply_chat_template(
	[
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": prompt},
	],
	tokenize=False,
	add_generation_prompt=True,
	)

	result = generator(formatted_prompt, max_tokens=1000)
	print(json.dumps(result, indent=2))