jezell · May 6, 2026 21:34
diff --git a/zero_usage_context_management.py b/zero_usage_context_management.py
 #!/usr/bin/env python3
 """Repro: Responses usage is zero when context_management compaction is present.

 Usage:
  OPENAI_API_KEY=... python zero_usage_context_management.py

 Optional env vars:
  OPENAI_MODEL=gpt-5.2
  OPENAI_COMPACT_THRESHOLD=200000
  OPENAI_MAX_OUTPUT_TOKENS=1024
 """

 from __future__ import annotations

 import asyncio
 import json
 import os
 from typing import Any

 from openai import AsyncOpenAI


 PROMPT = (
    "Write a story in about 500 words. Do not use tools. "
    "Return only the story text, with no intro or outro."
 )


 def response_text(response: Any) -> str:
    text = getattr(response, "output_text", None)
    if isinstance(text, str):
        return text

    chunks: list[str] = []
    for item in getattr(response, "output", []) or []:
        if getattr(item, "type", None) != "message":
            continue
        for content in getattr(item, "content", []) or []:
            content_text = getattr(content, "text", None)
            if isinstance(content_text, str):
                chunks.append(content_text)
    return "".join(chunks)


 def usage_dict(response: Any) -> dict[str, Any] | None:
    usage = getattr(response, "usage", None)
    if usage is None:
        return None
    to_dict = getattr(usage, "to_dict", None)
    if callable(to_dict):
        return to_dict()
    model_dump = getattr(usage, "model_dump", None)
    if callable(model_dump):
        return model_dump(mode="json")
    if isinstance(usage, dict):
        return usage
    return None


 async def create_response(
    *,
    client: AsyncOpenAI,
    model: str,
    max_output_tokens: int,
    context_management: list[dict[str, Any]] | None,
 ) -> dict[str, Any]:
    kwargs: dict[str, Any] = {
        "model": model,
        "input": PROMPT,
        "max_output_tokens": max_output_tokens,
    }
    if context_management is not None:
        kwargs["context_management"] = context_management

    response = await client.responses.create(**kwargs)
    text = response_text(response)
    return {
        "id": response.id,
        "output_chars": len(text),
        "usage": usage_dict(response),
    }


 async def main() -> None:
    model = os.getenv("OPENAI_MODEL", "gpt-5.2")
    threshold = int(os.getenv("OPENAI_COMPACT_THRESHOLD", "200000"))
    max_output_tokens = int(os.getenv("OPENAI_MAX_OUTPUT_TOKENS", "1024"))

    client = AsyncOpenAI(api_key=os.environ["OPENAI_API_KEY"])

    without_context_management = await create_response(
        client=client,
        model=model,
        max_output_tokens=max_output_tokens,
        context_management=None,
    )
    with_context_management = await create_response(
        client=client,
        model=model,
        max_output_tokens=max_output_tokens,
        context_management=[
            {
                "type": "compaction",
                "compact_threshold": threshold,
            }
        ],
    )

    print(
        json.dumps(
            {
                "model": model,
                "max_output_tokens": max_output_tokens,
                "compact_threshold": threshold,
                "without_context_management": without_context_management,
                "with_context_management": with_context_management,
            },
            indent=2,
            sort_keys=True,
        )
    )


 if __name__ == "__main__":
    asyncio.run(main())
	#!/usr/bin/env python3
	"""Repro: Responses usage is zero when context_management compaction is present.

	Usage:
	OPENAI_API_KEY=... python zero_usage_context_management.py

	Optional env vars:
	OPENAI_MODEL=gpt-5.2
	OPENAI_COMPACT_THRESHOLD=200000
	OPENAI_MAX_OUTPUT_TOKENS=1024
	"""

	from __future__ import annotations

	import asyncio
	import json
	import os
	from typing import Any

	from openai import AsyncOpenAI


	PROMPT = (
	"Write a story in about 500 words. Do not use tools. "
	"Return only the story text, with no intro or outro."
	)


	def response_text(response: Any) -> str:
	text = getattr(response, "output_text", None)
	if isinstance(text, str):
	return text

	chunks: list[str] = []
	for item in getattr(response, "output", []) or []:
	if getattr(item, "type", None) != "message":
	continue
	for content in getattr(item, "content", []) or []:
	content_text = getattr(content, "text", None)
	if isinstance(content_text, str):
	chunks.append(content_text)
	return "".join(chunks)


	def usage_dict(response: Any) -> dict[str, Any] \| None:
	usage = getattr(response, "usage", None)
	if usage is None:
	return None
	to_dict = getattr(usage, "to_dict", None)
	if callable(to_dict):
	return to_dict()
	model_dump = getattr(usage, "model_dump", None)
	if callable(model_dump):
	return model_dump(mode="json")
	if isinstance(usage, dict):
	return usage
	return None


	async def create_response(
	*,
	client: AsyncOpenAI,
	model: str,
	max_output_tokens: int,
	context_management: list[dict[str, Any]] \| None,
	) -> dict[str, Any]:
	kwargs: dict[str, Any] = {
	"model": model,
	"input": PROMPT,
	"max_output_tokens": max_output_tokens,
	}
	if context_management is not None:
	kwargs["context_management"] = context_management

	response = await client.responses.create(**kwargs)
	text = response_text(response)
	return {
	"id": response.id,
	"output_chars": len(text),
	"usage": usage_dict(response),
	}


	async def main() -> None:
	model = os.getenv("OPENAI_MODEL", "gpt-5.2")
	threshold = int(os.getenv("OPENAI_COMPACT_THRESHOLD", "200000"))
	max_output_tokens = int(os.getenv("OPENAI_MAX_OUTPUT_TOKENS", "1024"))

	client = AsyncOpenAI(api_key=os.environ["OPENAI_API_KEY"])

	without_context_management = await create_response(
	client=client,
	model=model,
	max_output_tokens=max_output_tokens,
	context_management=None,
	)
	with_context_management = await create_response(
	client=client,
	model=model,
	max_output_tokens=max_output_tokens,
	context_management=[
	{
	"type": "compaction",
	"compact_threshold": threshold,
	}
	],
	)

	print(
	json.dumps(
	{
	"model": model,
	"max_output_tokens": max_output_tokens,
	"compact_threshold": threshold,
	"without_context_management": without_context_management,
	"with_context_management": with_context_management,
	},
	indent=2,
	sort_keys=True,
	)
	)


	if __name__ == "__main__":
	asyncio.run(main())
No results found