Created
April 3, 2025 17:23
-
-
Save rasbt/d343874ab1a3e8512a24ab7a79c91b97 to your computer and use it in GitHub Desktop.
Reasoning Model Inference Scaling Example with DeepSeek and GitHub Models
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"BASE_PROMPT = \"What is (13 × 4) + (6 × 7)\"" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Plain DeepSeek-V3" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"=== Completion ===\n", | |
"To solve the expression \\((13 \\times 4) + (6 \\times 7)\\), follow these steps:\n", | |
"\n", | |
"1. **Calculate each multiplication separately:**\n", | |
" \\[\n", | |
" 13 \\times 4 = 52\n", | |
" \\]\n", | |
" \\[\n", | |
" 6 \\times 7 = 42\n", | |
" \\]\n", | |
"\n", | |
"2. **Add the results of the multiplications:**\n", | |
" \\[\n", | |
" 52 + 42 = 94\n", | |
" \\]\n", | |
"\n", | |
"**Final Answer:**\n", | |
"\\[\n", | |
"\\boxed{94}\n", | |
"\\]\n", | |
"\n", | |
"=== Token Usage ===\n", | |
"Prompt tokens: 18\n", | |
"Completion tokens: 107\n", | |
"Total tokens: 125\n" | |
] | |
} | |
], | |
"source": [ | |
"\"\"\"Run this model in Python\n", | |
"\n", | |
"> pip install azure-ai-inference\n", | |
"\"\"\"\n", | |
"import os\n", | |
"from azure.ai.inference import ChatCompletionsClient\n", | |
"from azure.ai.inference.models import SystemMessage\n", | |
"from azure.ai.inference.models import UserMessage\n", | |
"from azure.core.credentials import AzureKeyCredential\n", | |
"\n", | |
"# To authenticate with the model you will need to generate a personal access token (PAT) in your GitHub settings. \n", | |
"# Create your PAT token by following instructions here: https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens\n", | |
"client = ChatCompletionsClient(\n", | |
" endpoint=\"https://models.inference.ai.azure.com\",\n", | |
" credential=AzureKeyCredential(os.environ[\"GITHUB_TOKEN\"]),\n", | |
")\n", | |
"\n", | |
"response = client.complete(\n", | |
" messages=[\n", | |
" SystemMessage(\"\"\"\"\"\"),\n", | |
" UserMessage(BASE_PROMPT),\n", | |
" ],\n", | |
" model=\"DeepSeek-V3\",\n", | |
" temperature=0.8,\n", | |
" max_tokens=2048,\n", | |
" top_p=0.1\n", | |
")\n", | |
"\n", | |
"# Print content\n", | |
"content = response.choices[0].message.content\n", | |
"print(\"=== Completion ===\")\n", | |
"print(content)\n", | |
"\n", | |
"# Try to print token usage if available\n", | |
"try:\n", | |
" usage = response.usage\n", | |
" print(\"\\n=== Token Usage ===\")\n", | |
" print(f\"Prompt tokens: {usage.prompt_tokens}\")\n", | |
" print(f\"Completion tokens: {usage.completion_tokens}\")\n", | |
" print(f\"Total tokens: {usage.total_tokens}\")\n", | |
"except AttributeError:\n", | |
" print(\"\\n[⚠️] Token usage info not available in this response.\")\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## DeepSeek-V3: Think step by step" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"=== Completion ===\n", | |
"Sure! Let's break it down step by step.\n", | |
"\n", | |
"**Step 1:** Calculate \\(13 \\times 4\\). \n", | |
"\\[\n", | |
"13 \\times 4 = 52\n", | |
"\\]\n", | |
"\n", | |
"**Step 2:** Calculate \\(6 \\times 7\\). \n", | |
"\\[\n", | |
"6 \\times 7 = 42\n", | |
"\\]\n", | |
"\n", | |
"**Step 3:** Add the results from Step 1 and Step 2. \n", | |
"\\[\n", | |
"52 + 42 = 94\n", | |
"\\]\n", | |
"\n", | |
"**Final Answer:** \n", | |
"\\[\n", | |
"\\boxed{94}\n", | |
"\\]\n", | |
"\n", | |
"=== Token Usage ===\n", | |
"Prompt tokens: 23\n", | |
"Completion tokens: 108\n", | |
"Total tokens: 131\n" | |
] | |
} | |
], | |
"source": [ | |
"response = client.complete(\n", | |
" messages=[\n", | |
" SystemMessage(\"\"\"\"\"\"),\n", | |
" UserMessage(BASE_PROMPT + \"Think step by step.\"),\n", | |
" ],\n", | |
" model=\"DeepSeek-V3\",\n", | |
" temperature=0.8,\n", | |
" max_tokens=2048,\n", | |
" top_p=0.1\n", | |
")\n", | |
"\n", | |
"# Print content\n", | |
"content = response.choices[0].message.content\n", | |
"print(\"=== Completion ===\")\n", | |
"print(content)\n", | |
"\n", | |
"# Try to print token usage if available\n", | |
"try:\n", | |
" usage = response.usage\n", | |
" print(\"\\n=== Token Usage ===\")\n", | |
" print(f\"Prompt tokens: {usage.prompt_tokens}\")\n", | |
" print(f\"Completion tokens: {usage.completion_tokens}\")\n", | |
" print(f\"Total tokens: {usage.total_tokens}\")\n", | |
"except AttributeError:\n", | |
" print(\"\\n[⚠️] Token usage info not available in this response.\")\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## DeepSeek-V3 with Self-Consistency / Ensemble Reasoning" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\n", | |
"--- Completion 1 ---\n", | |
"Sure! Let's break it down step by step.\n", | |
"\n", | |
"**Step 1:** Calculate \\( 13 \\times 4 \\). \n", | |
"\\( 13 \\times 4 = 52 \\).\n", | |
"\n", | |
"**Step 2:** Calculate \\( 6 \\times 7 \\). \n", | |
"\\( 6 \\times 7 = 42 \\).\n", | |
"\n", | |
"**Step 3:** Add the results from Step 1 and Step 2. \n", | |
"\\( 52 + 42 = 94 \\).\n", | |
"\n", | |
"**Final Answer:** \n", | |
"\\( (13 \\times 4) + (6 \\times 7) = 94 \\).\n", | |
"🧾 Extracted Answer: 94\n", | |
"\n", | |
"--- Completion 2 ---\n", | |
"Sure! Let's break it down step by step.\n", | |
"\n", | |
"**Step 1:** Calculate \\( 13 \\times 4 \\). \n", | |
"\\( 13 \\times 4 = 52 \\).\n", | |
"\n", | |
"**Step 2:** Calculate \\( 6 \\times 7 \\). \n", | |
"\\( 6 \\times 7 = 42 \\).\n", | |
"\n", | |
"**Step 3:** Add the results from Step 1 and Step 2. \n", | |
"\\( 52 + 42 = 94 \\).\n", | |
"\n", | |
"**Final Answer:** \n", | |
"\\( (13 \\times 4) + (6 \\times 7) = 94 \\).\n", | |
"🧾 Extracted Answer: 94\n", | |
"\n", | |
"--- Completion 3 ---\n", | |
"Sure! Let's solve the expression step by step.\n", | |
"\n", | |
"**Given Expression:**\n", | |
"\\[\n", | |
"(13 \\times 4) + (6 \\times 7)\n", | |
"\\]\n", | |
"\n", | |
"**Step 1: Calculate \\(13 \\times 4\\)**\n", | |
"\\[\n", | |
"13 \\times 4 = 52\n", | |
"\\]\n", | |
"\n", | |
"**Step 2: Calculate \\(6 \\times 7\\)**\n", | |
"\\[\n", | |
"6 \\times 7 = 42\n", | |
"\\]\n", | |
"\n", | |
"**Step 3: Add the results from Step 1 and Step 2**\n", | |
"\\[\n", | |
"52 + 42 = 94\n", | |
"\\]\n", | |
"\n", | |
"**Final Answer:**\n", | |
"\\[\n", | |
"\\boxed{94}\n", | |
"\\]\n", | |
"🧾 Extracted Answer: 94\n", | |
"\n", | |
"--- Completion 4 ---\n", | |
"Sure! Let's break it down step by step.\n", | |
"\n", | |
"**Step 1:** Calculate \\(13 \\times 4\\). \n", | |
"\\[\n", | |
"13 \\times 4 = 52\n", | |
"\\]\n", | |
"\n", | |
"**Step 2:** Calculate \\(6 \\times 7\\). \n", | |
"\\[\n", | |
"6 \\times 7 = 42\n", | |
"\\]\n", | |
"\n", | |
"**Step 3:** Add the results from Step 1 and Step 2. \n", | |
"\\[\n", | |
"52 + 42 = 94\n", | |
"\\]\n", | |
"\n", | |
"**Final Answer:** \n", | |
"\\[\n", | |
"\\boxed{94}\n", | |
"\\]\n", | |
"🧾 Extracted Answer: 94\n", | |
"\n", | |
"--- Completion 5 ---\n", | |
"Sure! Let's break it down step by step.\n", | |
"\n", | |
"**Step 1:** Calculate \\( 13 \\times 4 \\). \n", | |
"\\[ 13 \\times 4 = 52 \\]\n", | |
"\n", | |
"**Step 2:** Calculate \\( 6 \\times 7 \\). \n", | |
"\\[ 6 \\times 7 = 42 \\]\n", | |
"\n", | |
"**Step 3:** Add the results from Step 1 and Step 2. \n", | |
"\\[ 52 + 42 = 94 \\]\n", | |
"\n", | |
"**Final Answer:** \n", | |
"\\[ (13 \\times 4) + (6 \\times 7) = 94 \\]\n", | |
"🧾 Extracted Answer: 94\n", | |
"\n", | |
"✅ Final Answer (Self-Consistency Vote): 94\n", | |
"Votes: Counter({'94': 5})\n" | |
] | |
} | |
], | |
"source": [ | |
"from azure.ai.inference.models import SystemMessage, UserMessage\n", | |
"from collections import Counter\n", | |
"import os\n", | |
"from azure.ai.inference import ChatCompletionsClient\n", | |
"from azure.core.credentials import AzureKeyCredential\n", | |
"\n", | |
"NUM_TRIALS = 5\n", | |
"question = \"What is (13 × 4) + (6 × 7)? Think step by step.\"\n", | |
"reasoning_model = \"DeepSeek-V3\"\n", | |
"extractor_model = \"Meta-Llama-3.1-8B-Instruct\" # You can change this to another light model if needed\n", | |
"\n", | |
"client = ChatCompletionsClient(\n", | |
" endpoint=\"https://models.inference.ai.azure.com\",\n", | |
" credential=AzureKeyCredential(os.environ[\"GITHUB_TOKEN\"]),\n", | |
")\n", | |
"\n", | |
"answers = []\n", | |
"\n", | |
"for i in range(NUM_TRIALS):\n", | |
" # Step 1: Generate reasoning from DeepSeek\n", | |
" reasoning_response = client.complete(\n", | |
" messages=[\n", | |
" SystemMessage(\"You are a helpful math tutor who shows all work.\"),\n", | |
" UserMessage(question)\n", | |
" ],\n", | |
" model=reasoning_model,\n", | |
" temperature=1.6,\n", | |
" max_tokens=512,\n", | |
" top_p=0.1,\n", | |
" )\n", | |
" reasoning = reasoning_response.choices[0].message.content.strip()\n", | |
" print(f\"\\n--- Completion {i+1} ---\")\n", | |
" print(reasoning)\n", | |
"\n", | |
" # Step 2: Extract final numeric answer using a second LLM\n", | |
" extractor_prompt = f\"\"\"Extract the final numeric answer from this math explanation. Respond with just the number.\n", | |
"\n", | |
"Explanation:\n", | |
"\\\"\\\"\\\"\n", | |
"{reasoning}\n", | |
"\\\"\\\"\\\"\n", | |
"\"\"\"\n", | |
"\n", | |
" extract_response = client.complete(\n", | |
" messages=[\n", | |
" SystemMessage(\"You are a precise assistant that extracts numbers from explanations.\"),\n", | |
" UserMessage(extractor_prompt)\n", | |
" ],\n", | |
" model=extractor_model,\n", | |
" temperature=0.0,\n", | |
" max_tokens=10\n", | |
" )\n", | |
"\n", | |
" extracted = extract_response.choices[0].message.content.strip()\n", | |
" print(f\"🧾 Extracted Answer: {extracted}\")\n", | |
" answers.append(extracted)\n", | |
"\n", | |
"# Step 3: Majority vote\n", | |
"counter = Counter(answers)\n", | |
"if counter:\n", | |
" most_common, count = counter.most_common(1)[0]\n", | |
" print(\"\\n✅ Final Answer (Self-Consistency Vote):\", most_common)\n", | |
" print(\"Votes:\", counter)\n", | |
"else:\n", | |
" print(\"❌ No answers extracted.\")" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## DeepSeek-R1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"=== Completion ===\n", | |
"<think>\n", | |
"Okay, let's see. I need to calculate (13 × 4) + (6 × 7). Hmm, right. So, first, I should probably handle the multiplication parts before adding them together. That makes sense because of the order of operations—parentheses first, then multiplication and division, and then addition and subtraction. Wait, actually, in this case, the parentheses are just grouping the multiplication operations, so I can do each multiplication separately and then add the results. \n", | |
"\n", | |
"Alright, let's start with the first part: 13 multiplied by 4. Let me think. 13 times 4. Breaking it down, 10 times 4 is 40, and 3 times 4 is 12. Then adding those together, 40 + 12 equals 52. So, 13 × 4 is 52. Got that part down.\n", | |
"\n", | |
"Now the second part: 6 multiplied by 7. That's straightforward. 6 times 7 is 42. I remember that from the multiplication tables. So, 6 × 7 is 42.\n", | |
"\n", | |
"Now, I need to add those two results together. So, 52 plus 42. Let me add the tens first. 50 plus 40 is 90, and then 2 plus 2 is 4. So, 90 + 4 is 94. Therefore, 52 + 42 equals 94. \n", | |
"\n", | |
"Wait, let me double-check to make sure I didn't make a mistake. Sometimes when I do mental math, I might mix up numbers. So, verifying each step again. \n", | |
"\n", | |
"First multiplication: 13 × 4. 10 × 4 is 40, 3 × 4 is 12. 40 + 12 is 52. Yep, that's correct. \n", | |
"\n", | |
"Second multiplication: 6 × 7. 6 times 5 is 30, plus 6 times 2 is 12. Wait, no, that's breaking it down differently. Actually, 6 × 7 is just 42. There's no need to break it down further. 6 times 7 is definitely 42. \n", | |
"\n", | |
"Adding 52 and 42. Let's do it another way to confirm. 52 + 40 is 92, and then add the remaining 2, which gives 94. Yep, that's the same as before. So, 52 + 42 is indeed 94. \n", | |
"\n", | |
"Hmm, seems like everything checks out. I don't think I made any errors here. Both multiplications are correct, and the addition is straightforward. So, the final answer should be 94. \n", | |
"\n", | |
"Just to be absolutely sure, maybe I can compute the entire expression step by step again. \n", | |
"\n", | |
"Starting over:\n", | |
"\n", | |
"13 × 4: 13 + 13 + 13 + 13. Let's add them two at a time. 13 + 13 is 26, and another 13 + 13 is 26. Then 26 + 26 is 52. Yep, same result. \n", | |
"\n", | |
"6 × 7: 6 + 6 + 6 + 6 + 6 + 6 + 6. That's seven 6s. 6 + 6 is 12, plus another 6 is 18, then 24, 30, 36, 42. Correct. \n", | |
"\n", | |
"Adding 52 and 42. Let's do 50 + 40 = 90, and 2 + 2 = 4. 90 + 4 = 94. Yep, same answer. \n", | |
"\n", | |
"Alternatively, using column addition:\n", | |
"\n", | |
" 52\n", | |
"+42\n", | |
"----\n", | |
" 94\n", | |
"\n", | |
"Yes, adding the units place: 2 + 2 = 4. Tens place: 5 + 4 = 9. So, 94. \n", | |
"\n", | |
"Alright, I think that's solid. No mistakes detected. The answer is 94.\n", | |
"</think>\n", | |
"\n", | |
"To solve \\((13 \\times 4) + (6 \\times 7)\\):\n", | |
"\n", | |
"1. **Calculate each multiplication separately:**\n", | |
" - \\(13 \\times 4 = 52\\)\n", | |
" - \\(6 \\times 7 = 42\\)\n", | |
"\n", | |
"2. **Add the results:**\n", | |
" - \\(52 + 42 = 94\\)\n", | |
"\n", | |
"**Answer:** \\(\\boxed{94}\\)\n", | |
"\n", | |
"=== Token Usage ===\n", | |
"Prompt tokens: 18\n", | |
"Completion tokens: 900\n", | |
"Total tokens: 918\n" | |
] | |
} | |
], | |
"source": [ | |
"response = client.complete(\n", | |
" messages=[\n", | |
" SystemMessage(\"\"\"\"\"\"),\n", | |
" UserMessage(BASE_PROMPT),\n", | |
" ],\n", | |
" model=\"DeepSeek-R1\",\n", | |
" temperature=0.8,\n", | |
" max_tokens=2048,\n", | |
" top_p=0.1\n", | |
")\n", | |
"\n", | |
"# Print content\n", | |
"content = response.choices[0].message.content\n", | |
"print(\"=== Completion ===\")\n", | |
"print(content)\n", | |
"\n", | |
"# Try to print token usage if available\n", | |
"try:\n", | |
" usage = response.usage\n", | |
" print(\"\\n=== Token Usage ===\")\n", | |
" print(f\"Prompt tokens: {usage.prompt_tokens}\")\n", | |
" print(f\"Completion tokens: {usage.completion_tokens}\")\n", | |
" print(f\"Total tokens: {usage.total_tokens}\")\n", | |
"except AttributeError:\n", | |
" print(\"\\n[⚠️] Token usage info not available in this response.\")\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "3.11.9", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.11.9" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment