Skip to content

Instantly share code, notes, and snippets.

@cbonesana
Created March 24, 2025 10:07
Show Gist options
  • Save cbonesana/f727c76763fe62222a9c44b3a2faa53b to your computer and use it in GitHub Desktop.
Save cbonesana/f727c76763fe62222a9c44b3a2faa53b to your computer and use it in GitHub Desktop.
Semantic entropy for LLM hallucination detection.
# %%
from openai import AsyncOpenAI
from openai.types.chat import ChatCompletion
from dotenv import load_dotenv
from tqdm.asyncio import tqdm as atqdm
import asyncio
import os
import nest_asyncio
import yaml
nest_asyncio.apply()
load_dotenv()
# %%
async def query(
client: AsyncOpenAI,
prompt: str,
model: str = "gpt-4o-mini",
repeat: int = 10,
) -> list[str]:
tasks = []
for _ in range(repeat):
tasks.append(
asyncio.create_task(
client.chat.completions.create(
model=model,
messages=[
{
"role": "user",
"content": prompt,
}
],
)
)
)
responses: list[ChatCompletion] = await atqdm.gather(*tasks)
return [r.choices[0].message.content or "" for r in responses]
async def assess(client: AsyncOpenAI, question: str, responses: list[str], model: str = "gpt-4o-mini") -> str:
response = await client.chat.completions.create(
model=model,
messages=[
{
"role": "user",
"content": """"You are a teacher that applied this question to your students and this are their answers.
Does these answers imply that the group understood the lesson?
### YOUR QUESTION:
{question}
### ANSWERS:
{answers}
Return your response in this format:
```yaml
thinking: |
<your step-by-step reasoning process>
reason: <why you made this decision>
decision: <POSITIVE if your students understood the lesson, otherwise NEGATIVE>
```
""".format(
question=question,
answers="\n".join(responses),
),
}
],
)
return response.choices[0].message.content or ""
# %%
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", None)
client = AsyncOpenAI(api_key=OPENAI_API_KEY)
loop = asyncio.new_event_loop()
# %%
question = "What's the capital of france?"
question = "Let's assume that you are capable of computing all digits of Pi, what is the sixth from last digit?"
responses = loop.run_until_complete(
query(
client,
question,
repeat=100,
)
)
response = loop.run_until_complete(
assess(
client,
question,
responses,
)
)
yaml_str = response.split("```yaml")[1].split("```")[0].strip()
response = yaml.safe_load(yaml_str) # Convert the text into a format our program can use
# %%
print("QUESTION:", question, "\n")
for r in responses:
print("-", r)
print()
print("DECISION:", response["decision"])
print("REASON:")
print(response["reason"])
print("THOUGHT PROCESS:")
print(response["thinking"])
# %%
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment