Skip to content

Instantly share code, notes, and snippets.

@sebington
Last active December 18, 2025 20:47
Show Gist options
  • Select an option

  • Save sebington/f0b3147f0d0b6a60a616a3e6bfe44ffd to your computer and use it in GitHub Desktop.

Select an option

Save sebington/f0b3147f0d0b6a60a616a3e6bfe44ffd to your computer and use it in GitHub Desktop.
This is a slightly modified version of the DiVeRSe (Diverse Verifier on Reasoning Steps) technique using LLMs with Mirascope. DiVeRSe is a prompt engineering method that enhances an LLM's reasoning capabilities by generating multiple reasoning chains from variations of the original prompt. All steps of the script are printed on screen.
# from https://mirascope.com/docs/mirascope/guides/prompt-engineering/chaining-based/diverse
# adapted for Groq by Claude (web version)
# /// script
# requires-python = ">=3.10"
# dependencies = [
# "mirascope[groq]",
# "pydantic>=2.0",
# ]
# ///
import asyncio
from mirascope.core import groq, prompt_template
from pydantic import BaseModel, Field
class PromptVariations(BaseModel):
variations: list[str] = Field(..., description="Variations of the original prompt")
@groq.call(model="llama-3.3-70b-versatile", response_model=PromptVariations)
@prompt_template(
"""
Return the {num_prompts} alternate variations of the prompt which retain the
full meaning but uses different phrasing.
Prompt: {prompt}
"""
)
def get_prompt_variations(prompt: str, num_prompts: int): ...
@groq.call(model="llama-3.3-70b-versatile")
@prompt_template(
"""
Answer the following question going step by step:
{query}
"""
)
async def zero_shot_cot(query: str): ...
class ResponseDetails(BaseModel):
solution_number: int = Field(
..., description="The actual number given as the answer in a solution."
)
correctness_probability: float = Field(
...,
ge=0,
le=1,
description="An estimated probability that the given solution is correct from 0.0 to 1.0",
)
@groq.call(model="llama-3.3-70b-versatile", response_model=ResponseDetails)
@prompt_template(
"""
Here is a query and a response which attempts to answer the query.
Prompt: {query}
Response: {response}
Extract the raw numerical value of the answer given by the response, and also
give an estimate between 0.0 and 1.0 of the probability that this solution
is correct.
"""
)
async def evaluate_response(query: str, response: str): ...
async def diverse(query: str, num_variations: int) -> str:
print("=" * 80)
print("STEP 1: Generating prompt variations")
print("=" * 80)
# Gather the variations of the prompt
alternate_variations = get_prompt_variations(query, num_variations - 1)
all_variations = alternate_variations.variations + [query]
print(f"\nOriginal query:\n{query}\n")
print(f"Generated {len(alternate_variations.variations)} variations:\n")
for i, variation in enumerate(alternate_variations.variations, 1):
print(f"{i}. {variation}\n")
print("=" * 80)
print("STEP 2: Generating reasoning chains with Chain-of-Thought")
print("=" * 80)
# Generate a unique reasoning chain for each prompt variation with CoT
cot_tasks = [zero_shot_cot(prompt) for prompt in all_variations]
cot_responses = [response.content for response in await asyncio.gather(*cot_tasks)]
for i, (variation, cot_response) in enumerate(zip(all_variations, cot_responses), 1):
print(f"\n--- Reasoning Chain {i} ---")
print(f"Prompt variation: {variation[:100]}...")
print(f"Response:\n{cot_response}\n")
print("=" * 80)
print("STEP 3: Evaluating each reasoning chain")
print("=" * 80)
# Evaluate each reasoning chain
eval_tasks = [
evaluate_response(query, cot_response) for cot_response in cot_responses
]
eval_responses = await asyncio.gather(*eval_tasks)
response_scores = {}
for i, eval_response in enumerate(eval_responses, 1):
print(f"\n--- Evaluation {i} ---")
print(f"Solution number: {eval_response.solution_number}")
print(f"Correctness probability: {eval_response.correctness_probability:.2f}")
if eval_response.solution_number not in response_scores:
response_scores[eval_response.solution_number] = 0
response_scores[eval_response.solution_number] += (
eval_response.correctness_probability
)
print("\n" + "=" * 80)
print("STEP 4: Aggregating scores and selecting best answer")
print("=" * 80)
print("\nAggregated scores:")
for solution, score in sorted(response_scores.items(), key=lambda x: x[1], reverse=True):
print(f"Solution {solution}: Total score = {score:.2f}")
best_response = max(response_scores.keys(), key=lambda k: response_scores[k])
print(f"\n{'=' * 80}")
print(f"FINAL ANSWER: {best_response}")
print(f"{'=' * 80}\n")
return best_response
async def run_diverse(prompt, num_variations=3) -> str:
response = await diverse(prompt, num_variations)
return response
async def main():
query = """
A committee of 3 people must be formed from a pool of 6 people, but Amy and Bob do not
get along and should not be on the committee at the same time. How many viable
combinations are there?
"""
result = await run_diverse(query)
if __name__ == "__main__":
asyncio.run(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment