Skip to content

Instantly share code, notes, and snippets.

@secemp9
Created July 22, 2025 02:29
Show Gist options
  • Save secemp9/a2cd6a0fbdd65d85aa2d7003c6802772 to your computer and use it in GitHub Desktop.
Save secemp9/a2cd6a0fbdd65d85aa2d7003c6802772 to your computer and use it in GitHub Desktop.
openrouter tool calling verify
import requests
import json
import os
import subprocess
import time
from typing import Dict, List, Any, Optional
import uuid
import shutil
class ToolCallingTester:
def __init__(self, api_key: str, base_url: str = "https://openrouter.ai/api/v1"):
self.base_url = base_url.rstrip('/')
self.api_key = api_key
self.session = requests.Session()
self.session.headers.update({
'Authorization': f'Bearer {self.api_key}',
'Content-Type': 'application/json',
'HTTP-Referer': 'https://opencode.ai/',
'X-Title': 'opencode',
})
self.test_results = []
self.supports_tool_feedback = True
self.available_models = self.get_available_models()
def get_available_models(self) -> List[str]:
"""Fetch models supporting tool calling from OpenRouter"""
try:
response = self.session.get(f"{self.base_url}/models")
if response.status_code == 200:
models = response.json().get('data', [])
return [model['id'] for model in models if 'tools' in model.get('supported_parameters', [])]
else:
print(f"⚠️ Failed to fetch models: {response.status_code} - {response.text}")
return []
except Exception as e:
print(f"⚠️ Error fetching models: {str(e)}")
return []
def create_shell_tool(self) -> Dict[str, Any]:
"""Define a shell tool for testing, compatible with OpenRouter"""
return {
"type": "function",
"function": {
"name": "shell",
"description": "Execute shell commands and return output",
"parameters": {
"type": "object",
"properties": {
"command": {
"type": "array",
"items": {"type": "string"},
"description": "Command to execute as array of strings (e.g., ['ls', '-la'])"
}
},
"required": ["command"]
}
}
}
def execute_shell_command(self, command_args: List[str], call_id: str) -> Dict[str, Any]:
"""Execute a test command with diagnostics"""
try:
start_time = time.time()
result = subprocess.run(command_args, capture_output=True, text=True, timeout=30)
duration = time.time() - start_time
is_success = result.returncode == 0
output_text = result.stdout if is_success else result.stderr
content = f"Command: {' '.join(command_args)}\n"
content += f"Exit code: {result.returncode}\n"
content += f"Duration: {duration:.2f}s\n"
content += f"Working directory: {os.getcwd()}\n\n"
content += output_text
return {
"call_id": call_id,
"content": content,
"success": is_success
}
except Exception as e:
content = f"Diagnostic Report for Failed Command: {' '.join(command_args)}\n"
content += f"Error: {str(e)}\n"
if isinstance(e, subprocess.CalledProcessError):
content += f"Stderr: {e.stderr}\n"
content += f"Suggestion: Verify command availability and environment setup.\n"
content += f"Environment Info:\nPATH: {os.getenv('PATH')}\nUser: {os.getlogin()}\nAPI Endpoint: {self.base_url}\n"
if not self.supports_tool_feedback:
content += "Possible API Issue: Feedback loop may not be supported.\n"
return {
"call_id": call_id,
"content": content,
"success": False
}
def send_test_request(self, messages: List[Dict], tools: List[Dict], model: str, tool_choice: Any, retries: int = 3) -> tuple[Optional[List[Dict]], Optional[str], Optional[str]]:
"""Send a test request with robust streaming parsing and non-streaming fallback"""
attempt = 0
while attempt < retries:
attempt += 1
for use_streaming in [True, False]:
try:
data = {
"model": model,
"messages": messages,
"tools": tools,
"tool_choice": tool_choice,
"stream": use_streaming
}
response = self.session.post(
f"{self.base_url}/chat/completions",
json=data,
stream=use_streaming
)
if response.status_code != 200:
error_msg = f"API Error: {response.status_code} - {response.text}"
if "404" in error_msg.lower() or "no endpoints found" in error_msg.lower():
error_msg += "\nPossible cause: No provider supports tool calling for this model"
continue # Try next mode
tool_calls = []
assistant_content = ""
if use_streaming:
class ToolCallState:
def __init__(self):
self.call_id = None
self.name = None
self.arguments = ""
self.active = False
index_to_state = {}
finish_reason = None
for line in response.iter_lines():
if line:
line_str = line.decode('utf-8')
if line_str.startswith('data: '):
data = line_str[6:]
if data == '[DONE]':
break
try:
chunk = json.loads(data)
if 'choices' in chunk and chunk['choices']:
choice = chunk['choices'][0]
delta = choice.get('delta', {})
finish_reason = choice.get('finish_reason')
if 'content' in delta and delta['content']:
assistant_content += delta['content']
if 'tool_calls' in delta and delta['tool_calls']:
for tc_delta in delta['tool_calls']:
index = tc_delta.get('index', len(index_to_state))
if index not in index_to_state:
index_to_state[index] = ToolCallState()
state = index_to_state[index]
state.active = True
if 'id' in tc_delta:
state.call_id = tc_delta['id']
if 'function' in tc_delta:
func = tc_delta['function']
if 'name' in func:
state.name = func['name']
if 'arguments' in func and func['arguments']:
state.arguments += func['arguments']
if finish_reason:
break
except json.JSONDecodeError:
continue
# After streaming, collect valid tool calls if applicable
if finish_reason == 'tool_calls':
for index, state in sorted(index_to_state.items()):
if state.active:
try:
json.loads(state.arguments)
tool_calls.append({
"id": state.call_id,
"type": "function",
"function": {
"name": state.name,
"arguments": state.arguments
}
})
except json.JSONDecodeError:
continue
else:
# Non-streaming
resp_json = response.json()
if 'choices' in resp_json and resp_json['choices']:
choice = resp_json['choices'][0]
message = choice['message']
assistant_content = message.get('content', "")
tool_calls_raw = message.get('tool_calls', [])
for tc in tool_calls_raw:
if tc.get('type') == 'function':
try:
json.loads(tc['function']['arguments'])
tool_calls.append(tc)
except json.JSONDecodeError:
continue
if tool_calls or assistant_content:
return tool_calls, assistant_content, None
else:
continue # Try next mode if no output
except Exception as e:
error_msg = f"Request failed ({'streaming' if use_streaming else 'non-streaming'}): {str(e)}"
continue
time.sleep(1) # Retry delay
return None, None, f"Request failed after {retries} attempts"
def test_tool_calling(self, model: str) -> Dict[str, Any]:
"""Run a complete tool-calling test for OpenRouter"""
result = {
"tool_call_success": False,
"feedback_loop_success": False,
"diagnostics": [],
"model_used": model
}
# Verify model compatibility
if model not in self.available_models:
result["diagnostics"].append(f"Model '{model}' not in supported tool-calling models: {self.available_models}")
result["diagnostics"].append("Suggestion: Choose a model from the available list or check https://openrouter.ai/models?supported_parameters=tools")
return result
# Step 1: Test initial tool call
messages = [
{
"role": "system",
"content": (
"You are a test assistant. Use the shell tool to run ['echo', 'TEST']. "
"Always provide valid JSON arguments like {'command': ['echo', 'TEST']}. "
"If tool calling fails, explain why and suggest alternatives."
)
},
{"role": "user", "content": "Run the shell command 'echo TEST'"}
]
tools = [self.create_shell_tool()]
force_tool_choice = {"type": "function", "function": {"name": "shell"}}
tool_calls, assistant_content, error = self.send_test_request(messages, tools, model, tool_choice=force_tool_choice)
if error:
result["diagnostics"].append(f"Initial tool call failed: {error}")
if "404" in error.lower() or "no endpoints found" in error.lower():
result["diagnostics"].append("Possible cause: No provider supports tool calling for this model")
result["diagnostics"].append("Suggestion: Try models like anthropic/claude-3.5-sonnet:beta or mistralai/mistral-large-2411")
return result
if not tool_calls:
result["diagnostics"].append("No tool calls generated. Possible causes:")
result["diagnostics"].append("- Model may not support tool calling consistently")
result["diagnostics"].append("- Provider routing may have selected a non-tool-calling endpoint")
result["diagnostics"].append("- System prompt may need more explicit tool-calling instructions")
result["diagnostics"].append(f"Assistant content (if any): {assistant_content}")
result["diagnostics"].append("Suggestion: Try models like anthropic/claude-3.5-sonnet:beta or adjust prompt")
return result
result["tool_call_success"] = True
result["diagnostics"].append(f"Tool calls generated: {tool_calls}")
# Step 2: Execute the tool calls
tool_results = []
for tool_call in tool_calls:
if tool_call["function"]["name"] == "shell":
try:
args = json.loads(tool_call["function"]["arguments"]) if tool_call["function"]["arguments"].strip() else {"command": ["echo", "TEST"]}
command = args.get("command", ["echo", "TEST"])
tool_result = self.execute_shell_command(command, tool_call["id"])
tool_results.append(tool_result)
result["diagnostics"].append(f"Tool executed: {tool_result['content']}")
except json.JSONDecodeError as e:
result["diagnostics"].append(f"Failed to parse tool arguments: {e}")
result["diagnostics"].append("Possible cause: Model generated malformed JSON arguments")
result["diagnostics"].append("Suggestion: Try a model with better JSON output, like anthropic/claude-3.5-sonnet:beta")
return result
else:
result["diagnostics"].append("Unexpected tool call name")
return result
# Step 3: Test feedback loop
messages.append({
"role": "assistant",
"content": assistant_content,
"tool_calls": tool_calls
})
for tool_result in tool_results:
messages.append({
"role": "tool",
"tool_call_id": tool_result["call_id"],
"content": tool_result["content"]
})
tool_calls, assistant_content, error = self.send_test_request(messages, tools, model, tool_choice="none")
if error:
result["diagnostics"].append(f"Feedback loop failed: {error}")
if "404" in error.lower() or "no endpoints found" in error.lower():
result["diagnostics"].append("Possible cause: Provider does not support tool output feedback for this model")
result["diagnostics"].append("Suggestion: Try models like anthropic/claude-3.5-sonnet:beta or contact OpenRouter support")
self.supports_tool_feedback = False
elif assistant_content:
result["feedback_loop_success"] = True
result["diagnostics"].append(f"Feedback loop succeeded: Model responded with '{assistant_content}'")
else:
result["diagnostics"].append("Feedback loop failed: No response to tool output")
result["diagnostics"].append("Possible causes:")
result["diagnostics"].append("- Provider may not process 'tool' role messages correctly")
result["diagnostics"].append("- Model may not handle tool output feedback")
result["diagnostics"].append("Suggestion: Try models like anthropic/claude-3.5-sonnet:beta")
self.supports_tool_feedback = False
return result
def run(self):
"""Interactive tool to test tool calling"""
print("πŸ” Tool Calling Tester")
print(f"API Base URL: {self.base_url}")
print(f"API Key: {'*' * len(self.api_key[:-4]) + self.api_key[-4:]}")
print("Fetching available models with tool-calling support...")
print(f"Available tool-calling models: {self.available_models}")
print("Enter model name (e.g., openai/gpt-4o, anthropic/claude-3.5-sonnet:beta, press Enter for default):")
model = input().strip() or "anthropic/claude-3.5-sonnet:beta"
print(f"\nTesting tool calling with model: {model}")
result = self.test_tool_calling(model)
print("\nπŸ“Š Test Results:")
print(f"Model Used: {result['model_used']}")
print(f"Tool Call Success: {'βœ…' if result['tool_call_success'] else '❌'}")
print(f"Feedback Loop Success: {'βœ…' if result['feedback_loop_success'] else '❌'}")
print("\nDiagnostics:")
for diag in result["diagnostics"]:
print(f"- {diag}")
if not result["tool_call_success"] or not result["feedback_loop_success"]:
print("\n⚠️ Tool calling issues detected.")
print("Suggestions:")
print("- Ensure the model supports tool calling: https://openrouter.ai/models?supported_parameters=tools")
print("- Use a known compatible model like anthropic/claude-3.5-sonnet:beta or mistralai/mistral-large-2411")
print("- Check OpenRouter documentation: https://openrouter.ai/docs/features/tool-calling")
print("- Contact OpenRouter support via Discord: https://openrouter.ai/docs/faq")
print("- Consider using OpenAI's API directly: https://api.openai.com/v1")
print("- Verify API key credits and permissions at https://openrouter.ai/keys")
if __name__ == "__main__":
print("πŸ”§ Tool Calling Tester Setup")
print("Enter API Base URL (default: https://openrouter.ai/api/v1):")
base_url = input().strip() or "https://openrouter.ai/api/v1"
print("Enter API Key:")
api_key = input().strip()
if not api_key:
print("❌ API Key is required")
exit(1)
tester = ToolCallingTester(api_key, base_url)
tester.run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment