Skip to content

Instantly share code, notes, and snippets.

@jezell
Created May 6, 2026 21:34
Show Gist options
  • Select an option

  • Save jezell/865d0a54d661c0dbeb9c16df2dc9a276 to your computer and use it in GitHub Desktop.

Select an option

Save jezell/865d0a54d661c0dbeb9c16df2dc9a276 to your computer and use it in GitHub Desktop.
openai incorrect usage totals with context management
#!/usr/bin/env python3
"""Repro: Responses usage is zero when context_management compaction is present.
Usage:
OPENAI_API_KEY=... python zero_usage_context_management.py
Optional env vars:
OPENAI_MODEL=gpt-5.2
OPENAI_COMPACT_THRESHOLD=200000
OPENAI_MAX_OUTPUT_TOKENS=1024
"""
from __future__ import annotations
import asyncio
import json
import os
from typing import Any
from openai import AsyncOpenAI
PROMPT = (
"Write a story in about 500 words. Do not use tools. "
"Return only the story text, with no intro or outro."
)
def response_text(response: Any) -> str:
text = getattr(response, "output_text", None)
if isinstance(text, str):
return text
chunks: list[str] = []
for item in getattr(response, "output", []) or []:
if getattr(item, "type", None) != "message":
continue
for content in getattr(item, "content", []) or []:
content_text = getattr(content, "text", None)
if isinstance(content_text, str):
chunks.append(content_text)
return "".join(chunks)
def usage_dict(response: Any) -> dict[str, Any] | None:
usage = getattr(response, "usage", None)
if usage is None:
return None
to_dict = getattr(usage, "to_dict", None)
if callable(to_dict):
return to_dict()
model_dump = getattr(usage, "model_dump", None)
if callable(model_dump):
return model_dump(mode="json")
if isinstance(usage, dict):
return usage
return None
async def create_response(
*,
client: AsyncOpenAI,
model: str,
max_output_tokens: int,
context_management: list[dict[str, Any]] | None,
) -> dict[str, Any]:
kwargs: dict[str, Any] = {
"model": model,
"input": PROMPT,
"max_output_tokens": max_output_tokens,
}
if context_management is not None:
kwargs["context_management"] = context_management
response = await client.responses.create(**kwargs)
text = response_text(response)
return {
"id": response.id,
"output_chars": len(text),
"usage": usage_dict(response),
}
async def main() -> None:
model = os.getenv("OPENAI_MODEL", "gpt-5.2")
threshold = int(os.getenv("OPENAI_COMPACT_THRESHOLD", "200000"))
max_output_tokens = int(os.getenv("OPENAI_MAX_OUTPUT_TOKENS", "1024"))
client = AsyncOpenAI(api_key=os.environ["OPENAI_API_KEY"])
without_context_management = await create_response(
client=client,
model=model,
max_output_tokens=max_output_tokens,
context_management=None,
)
with_context_management = await create_response(
client=client,
model=model,
max_output_tokens=max_output_tokens,
context_management=[
{
"type": "compaction",
"compact_threshold": threshold,
}
],
)
print(
json.dumps(
{
"model": model,
"max_output_tokens": max_output_tokens,
"compact_threshold": threshold,
"without_context_management": without_context_management,
"with_context_management": with_context_management,
},
indent=2,
sort_keys=True,
)
)
if __name__ == "__main__":
asyncio.run(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment