Last active
March 25, 2025 20:38
-
-
Save cpfiffer/d1965f0c6ee1578e408c4ed7737ff646 to your computer and use it in GitHub Desktop.
Recursive knowledge concept map using Outlines
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Setup instructions: | |
# pip install 'outlines[transformers]' | |
import outlines | |
from transformers import AutoTokenizer | |
import json | |
# MODEL_STRING = "HuggingFaceTB/SmolLM2-135M-Instruct" # Small model | |
# MODEL_STRING = "HuggingFaceTB/SmolLM2-1.7B-Instruct" # Larger but kind of boring | |
MODEL_STRING = "NousResearch/Hermes-3-Llama-3.1-8B" | |
schema = """ | |
{ | |
"type": "object", | |
"properties": { | |
"concept": { | |
"type": "string", | |
"description": "The main concept or topic" | |
}, | |
"definition": { | |
"type": "string", | |
"description": "Brief definition of the concept" | |
}, | |
"related_concepts": { | |
"type": "array", | |
"items": { | |
"$ref": "#" | |
}, | |
"maxItems": 3, | |
"description": "Related sub-concepts that help explain the main concept" | |
} | |
}, | |
"required": ["concept", "definition"] | |
} | |
""" | |
llm = outlines.models.transformers(MODEL_STRING) | |
tokenizer = AutoTokenizer.from_pretrained(MODEL_STRING) | |
generator = outlines.generate.json(llm, schema) | |
system_prompt = """ | |
You provide concept maps. | |
``` | |
""" | |
prompt = """ | |
Decompose the concept of "architecture" into a concept map -- each concept should have a name and definition, | |
and then a list of related concepts. Related concepts are optional, but they should be sub-concepts of their | |
parents. | |
""" | |
formatted_prompt = tokenizer.apply_chat_template( | |
[ | |
{"role": "system", "content": system_prompt}, | |
{"role": "user", "content": prompt}, | |
], | |
tokenize=False, | |
add_generation_prompt=True, | |
) | |
result = generator(formatted_prompt, max_tokens=1000) | |
print(json.dumps(result, indent=2)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Also: