Last active
December 18, 2025 20:47
-
-
Save sebington/f0b3147f0d0b6a60a616a3e6bfe44ffd to your computer and use it in GitHub Desktop.
This is a slightly modified version of the DiVeRSe (Diverse Verifier on Reasoning Steps) technique using LLMs with Mirascope. DiVeRSe is a prompt engineering method that enhances an LLM's reasoning capabilities by generating multiple reasoning chains from variations of the original prompt. All steps of the script are printed on screen.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # from https://mirascope.com/docs/mirascope/guides/prompt-engineering/chaining-based/diverse | |
| # adapted for Groq by Claude (web version) | |
| # /// script | |
| # requires-python = ">=3.10" | |
| # dependencies = [ | |
| # "mirascope[groq]", | |
| # "pydantic>=2.0", | |
| # ] | |
| # /// | |
| import asyncio | |
| from mirascope.core import groq, prompt_template | |
| from pydantic import BaseModel, Field | |
| class PromptVariations(BaseModel): | |
| variations: list[str] = Field(..., description="Variations of the original prompt") | |
| @groq.call(model="llama-3.3-70b-versatile", response_model=PromptVariations) | |
| @prompt_template( | |
| """ | |
| Return the {num_prompts} alternate variations of the prompt which retain the | |
| full meaning but uses different phrasing. | |
| Prompt: {prompt} | |
| """ | |
| ) | |
| def get_prompt_variations(prompt: str, num_prompts: int): ... | |
| @groq.call(model="llama-3.3-70b-versatile") | |
| @prompt_template( | |
| """ | |
| Answer the following question going step by step: | |
| {query} | |
| """ | |
| ) | |
| async def zero_shot_cot(query: str): ... | |
| class ResponseDetails(BaseModel): | |
| solution_number: int = Field( | |
| ..., description="The actual number given as the answer in a solution." | |
| ) | |
| correctness_probability: float = Field( | |
| ..., | |
| ge=0, | |
| le=1, | |
| description="An estimated probability that the given solution is correct from 0.0 to 1.0", | |
| ) | |
| @groq.call(model="llama-3.3-70b-versatile", response_model=ResponseDetails) | |
| @prompt_template( | |
| """ | |
| Here is a query and a response which attempts to answer the query. | |
| Prompt: {query} | |
| Response: {response} | |
| Extract the raw numerical value of the answer given by the response, and also | |
| give an estimate between 0.0 and 1.0 of the probability that this solution | |
| is correct. | |
| """ | |
| ) | |
| async def evaluate_response(query: str, response: str): ... | |
| async def diverse(query: str, num_variations: int) -> str: | |
| print("=" * 80) | |
| print("STEP 1: Generating prompt variations") | |
| print("=" * 80) | |
| # Gather the variations of the prompt | |
| alternate_variations = get_prompt_variations(query, num_variations - 1) | |
| all_variations = alternate_variations.variations + [query] | |
| print(f"\nOriginal query:\n{query}\n") | |
| print(f"Generated {len(alternate_variations.variations)} variations:\n") | |
| for i, variation in enumerate(alternate_variations.variations, 1): | |
| print(f"{i}. {variation}\n") | |
| print("=" * 80) | |
| print("STEP 2: Generating reasoning chains with Chain-of-Thought") | |
| print("=" * 80) | |
| # Generate a unique reasoning chain for each prompt variation with CoT | |
| cot_tasks = [zero_shot_cot(prompt) for prompt in all_variations] | |
| cot_responses = [response.content for response in await asyncio.gather(*cot_tasks)] | |
| for i, (variation, cot_response) in enumerate(zip(all_variations, cot_responses), 1): | |
| print(f"\n--- Reasoning Chain {i} ---") | |
| print(f"Prompt variation: {variation[:100]}...") | |
| print(f"Response:\n{cot_response}\n") | |
| print("=" * 80) | |
| print("STEP 3: Evaluating each reasoning chain") | |
| print("=" * 80) | |
| # Evaluate each reasoning chain | |
| eval_tasks = [ | |
| evaluate_response(query, cot_response) for cot_response in cot_responses | |
| ] | |
| eval_responses = await asyncio.gather(*eval_tasks) | |
| response_scores = {} | |
| for i, eval_response in enumerate(eval_responses, 1): | |
| print(f"\n--- Evaluation {i} ---") | |
| print(f"Solution number: {eval_response.solution_number}") | |
| print(f"Correctness probability: {eval_response.correctness_probability:.2f}") | |
| if eval_response.solution_number not in response_scores: | |
| response_scores[eval_response.solution_number] = 0 | |
| response_scores[eval_response.solution_number] += ( | |
| eval_response.correctness_probability | |
| ) | |
| print("\n" + "=" * 80) | |
| print("STEP 4: Aggregating scores and selecting best answer") | |
| print("=" * 80) | |
| print("\nAggregated scores:") | |
| for solution, score in sorted(response_scores.items(), key=lambda x: x[1], reverse=True): | |
| print(f"Solution {solution}: Total score = {score:.2f}") | |
| best_response = max(response_scores.keys(), key=lambda k: response_scores[k]) | |
| print(f"\n{'=' * 80}") | |
| print(f"FINAL ANSWER: {best_response}") | |
| print(f"{'=' * 80}\n") | |
| return best_response | |
| async def run_diverse(prompt, num_variations=3) -> str: | |
| response = await diverse(prompt, num_variations) | |
| return response | |
| async def main(): | |
| query = """ | |
| A committee of 3 people must be formed from a pool of 6 people, but Amy and Bob do not | |
| get along and should not be on the committee at the same time. How many viable | |
| combinations are there? | |
| """ | |
| result = await run_diverse(query) | |
| if __name__ == "__main__": | |
| asyncio.run(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment