Created
May 6, 2026 21:34
-
-
Save jezell/865d0a54d661c0dbeb9c16df2dc9a276 to your computer and use it in GitHub Desktop.
openai incorrect usage totals with context management
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """Repro: Responses usage is zero when context_management compaction is present. | |
| Usage: | |
| OPENAI_API_KEY=... python zero_usage_context_management.py | |
| Optional env vars: | |
| OPENAI_MODEL=gpt-5.2 | |
| OPENAI_COMPACT_THRESHOLD=200000 | |
| OPENAI_MAX_OUTPUT_TOKENS=1024 | |
| """ | |
| from __future__ import annotations | |
| import asyncio | |
| import json | |
| import os | |
| from typing import Any | |
| from openai import AsyncOpenAI | |
| PROMPT = ( | |
| "Write a story in about 500 words. Do not use tools. " | |
| "Return only the story text, with no intro or outro." | |
| ) | |
| def response_text(response: Any) -> str: | |
| text = getattr(response, "output_text", None) | |
| if isinstance(text, str): | |
| return text | |
| chunks: list[str] = [] | |
| for item in getattr(response, "output", []) or []: | |
| if getattr(item, "type", None) != "message": | |
| continue | |
| for content in getattr(item, "content", []) or []: | |
| content_text = getattr(content, "text", None) | |
| if isinstance(content_text, str): | |
| chunks.append(content_text) | |
| return "".join(chunks) | |
| def usage_dict(response: Any) -> dict[str, Any] | None: | |
| usage = getattr(response, "usage", None) | |
| if usage is None: | |
| return None | |
| to_dict = getattr(usage, "to_dict", None) | |
| if callable(to_dict): | |
| return to_dict() | |
| model_dump = getattr(usage, "model_dump", None) | |
| if callable(model_dump): | |
| return model_dump(mode="json") | |
| if isinstance(usage, dict): | |
| return usage | |
| return None | |
| async def create_response( | |
| *, | |
| client: AsyncOpenAI, | |
| model: str, | |
| max_output_tokens: int, | |
| context_management: list[dict[str, Any]] | None, | |
| ) -> dict[str, Any]: | |
| kwargs: dict[str, Any] = { | |
| "model": model, | |
| "input": PROMPT, | |
| "max_output_tokens": max_output_tokens, | |
| } | |
| if context_management is not None: | |
| kwargs["context_management"] = context_management | |
| response = await client.responses.create(**kwargs) | |
| text = response_text(response) | |
| return { | |
| "id": response.id, | |
| "output_chars": len(text), | |
| "usage": usage_dict(response), | |
| } | |
| async def main() -> None: | |
| model = os.getenv("OPENAI_MODEL", "gpt-5.2") | |
| threshold = int(os.getenv("OPENAI_COMPACT_THRESHOLD", "200000")) | |
| max_output_tokens = int(os.getenv("OPENAI_MAX_OUTPUT_TOKENS", "1024")) | |
| client = AsyncOpenAI(api_key=os.environ["OPENAI_API_KEY"]) | |
| without_context_management = await create_response( | |
| client=client, | |
| model=model, | |
| max_output_tokens=max_output_tokens, | |
| context_management=None, | |
| ) | |
| with_context_management = await create_response( | |
| client=client, | |
| model=model, | |
| max_output_tokens=max_output_tokens, | |
| context_management=[ | |
| { | |
| "type": "compaction", | |
| "compact_threshold": threshold, | |
| } | |
| ], | |
| ) | |
| print( | |
| json.dumps( | |
| { | |
| "model": model, | |
| "max_output_tokens": max_output_tokens, | |
| "compact_threshold": threshold, | |
| "without_context_management": without_context_management, | |
| "with_context_management": with_context_management, | |
| }, | |
| indent=2, | |
| sort_keys=True, | |
| ) | |
| ) | |
| if __name__ == "__main__": | |
| asyncio.run(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment