|
/** |
|
* OpenRouter API configuration and client for LLM tasks |
|
*/ |
|
import { z, ZodType } from 'zod'; |
|
import { zodToJsonSchema } from 'zod-to-json-schema'; |
|
export const OPENROUTER_API_KEY = process.env.OPENROUTER_API_KEY; |
|
export const HELICONE_API_KEY = process.env.HELICONE_API_KEY; |
|
|
|
// Define the base URLs |
|
const OPENROUTER_DEFAULT_URL = 'https://openrouter.ai/api/v1'; |
|
const OPENROUTER_HELICONE_URL = 'https://openrouter.helicone.ai/api/v1'; |
|
|
|
// Select the URL based on whether Helicone API key is provided |
|
export const OPENROUTER_BASE_URL = HELICONE_API_KEY |
|
? OPENROUTER_HELICONE_URL // Use Helicone if key exists |
|
: OPENROUTER_DEFAULT_URL; // Otherwise, use default OpenRouter URL |
|
|
|
console.log(`Using OpenRouter base URL: ${OPENROUTER_BASE_URL}`); |
|
|
|
export const OPENROUTER_DEFAULT_HEADERS = { |
|
// NOTE: HTTP-Referer and X-Title make your app public on operouter stats! |
|
Authorization: `Bearer ${OPENROUTER_API_KEY}`, |
|
'Helicone-Auth': `Bearer ${HELICONE_API_KEY}`, |
|
'Content-Type': 'application/json', |
|
}; |
|
|
|
/** |
|
* Interface for storing detailed information about each supported LLM model. |
|
*/ |
|
export interface ModelInfo { |
|
id: string; // OpenRouter model identifier |
|
supportsStructuredOutput: boolean; // Whether the model is verified to support structured output (json_schema) |
|
defaultRoutingOptions?: { |
|
// Nested object for default routing |
|
providerSort?: 'price' | 'throughput' | 'latency'; |
|
providerOrder?: string[]; // See: https://openrouter.ai/docs/features/provider-routing#json-schema-for-provider-preferences |
|
ignoredProviders?: string[]; |
|
providerOnly?: string[]; // Restrict to specific providers only |
|
allowFallbacks?: boolean; // Whether to allow fallbacks to other models |
|
reasoning?: ReasoningOptions; // Reasoning configuration |
|
}; |
|
} |
|
|
|
/** |
|
* Reasoning options for controlling model's reasoning effort. |
|
* Based on OpenRouter API documentation. |
|
*/ |
|
export interface ReasoningOptions { |
|
// One of the following (not both): |
|
effort?: 'low' | 'medium' | 'high'; // OpenAI-style effort control |
|
max_tokens?: number; // Anthropic-style specific token limit |
|
|
|
// Optional settings: |
|
exclude?: boolean; // Set to true to exclude reasoning tokens from response (default: false) |
|
enabled?: boolean; // Enable reasoning with default parameters (default: inferred from effort or max_tokens) |
|
} |
|
|
|
/** |
|
* Optional routing preferences that can override the defaults set in ModelInfo. |
|
*/ |
|
export interface RoutingOptions { |
|
providerSort?: 'price' | 'throughput' | 'latency'; |
|
providerOrder?: string[]; |
|
ignoredProviders?: string[]; |
|
providerOnly?: string[]; // Restrict to specific providers only |
|
allowFallbacks?: boolean; // Whether to allow fallbacks to other models |
|
noRoutingOptions?: boolean; |
|
reasoning?: ReasoningOptions; // Reasoning configuration |
|
} |
|
|
|
/** |
|
* OpenRouter model identifiers that support structured outputs |
|
* All these models have been verified to support structured_outputs parameter |
|
*/ |
|
export const Models: Record<string, ModelInfo> = { |
|
// OpenAI models |
|
GPT_oss_120B: { |
|
id: 'openai/gpt-oss-120b', |
|
supportsStructuredOutput: true, |
|
defaultRoutingOptions: { |
|
providerOrder: ['together', 'parasail/fp4', 'groq'], |
|
ignoredProviders: ['deepinfra/fp4', 'fireworks'], |
|
reasoning: { |
|
effort: 'medium' |
|
}, |
|
}, |
|
}, |
|
GPT5Mini: { |
|
id: 'openai/gpt-5-mini', |
|
supportsStructuredOutput: true, |
|
}, |
|
GPT4o: { |
|
id: 'openai/chatgpt-4o-latest', |
|
supportsStructuredOutput: true, |
|
}, |
|
GPT4_1: { |
|
id: 'openai/gpt-4.1', |
|
supportsStructuredOutput: true, |
|
}, |
|
GPT4_1Mini: { |
|
id: 'openai/gpt-4.1-mini', |
|
supportsStructuredOutput: true, |
|
}, |
|
GPT4_1Nano: { |
|
id: 'openai/gpt-4.1-nano', |
|
supportsStructuredOutput: true, |
|
}, |
|
o4Mini: { |
|
id: 'openai/o4-mini', |
|
supportsStructuredOutput: true, |
|
}, |
|
o3Mini: { |
|
id: 'openai/o3-mini', |
|
supportsStructuredOutput: true, |
|
}, |
|
|
|
// Google models |
|
Gemini25Pro: { |
|
id: 'google/gemini-2.5-pro', |
|
supportsStructuredOutput: true, |
|
}, |
|
Gemini25Flash: { |
|
id: 'google/gemini-2.5-flash', |
|
supportsStructuredOutput: true, |
|
}, |
|
Gemini25FlashLite: { |
|
id: 'google/gemini-2.5-flash-lite', |
|
supportsStructuredOutput: true, |
|
}, |
|
Gemma3_27b_IT: { |
|
id: 'google/gemma-3-27b-it', |
|
supportsStructuredOutput: true, |
|
}, |
|
|
|
// Deepseek models |
|
DeepseekR1: { |
|
id: 'deepseek/deepseek-r1-0528', |
|
supportsStructuredOutput: false, |
|
defaultRoutingOptions: { |
|
providerSort: 'price', |
|
providerOrder: ['lambda/fp8', 'deepinfra/fp4', 'baseten/fp8'], |
|
}, |
|
}, |
|
DeepseekV3: { |
|
id: 'deepseek/deepseek-chat-v3-0324', |
|
supportsStructuredOutput: false, |
|
defaultRoutingOptions: { |
|
providerSort: 'price', |
|
ignoredProviders: ['inference-net'], |
|
}, |
|
}, |
|
|
|
// Qwen models |
|
Qwen3235bA22b: { |
|
id: 'qwen/qwen3-235b-a22b', |
|
supportsStructuredOutput: false, |
|
defaultRoutingOptions: { |
|
providerOrder: [ |
|
'nebius/fp8', |
|
'together/fp8', |
|
'cerebras', // EXPENSIVE! |
|
'parasail/fp8', |
|
'fireworks', |
|
'deepinfra/fp8', |
|
], |
|
allowFallbacks: false, |
|
}, |
|
}, |
|
Qwen3235bA22b_2507: { |
|
id: 'qwen/qwen3-235b-a22b-thinking-2507', |
|
supportsStructuredOutput: false, |
|
defaultRoutingOptions: { |
|
providerSort: 'price', |
|
allowFallbacks: true, |
|
}, |
|
}, |
|
|
|
// xAI Grok |
|
Grok3Mini: { |
|
id: 'x-ai/grok-3-mini', |
|
supportsStructuredOutput: true, |
|
defaultRoutingOptions: { |
|
reasoning: { |
|
effort: 'medium', |
|
exclude: true, |
|
}, |
|
}, |
|
}, |
|
|
|
// Moonshot Kimi |
|
Kimi_K2: { |
|
id: 'moonshotai/kimi-k2', |
|
supportsStructuredOutput: false, |
|
}, |
|
|
|
// Llama models |
|
Llama4Maverick: { |
|
id: 'meta-llama/llama-4-maverick', |
|
supportsStructuredOutput: false, |
|
}, |
|
|
|
// Anthropic models |
|
Claude: { |
|
id: 'anthropic/claude-3.5-sonnet', |
|
supportsStructuredOutput: true, |
|
}, |
|
}; |
|
|
|
/** |
|
* Builds the provider preferences and fallback model list for the OpenRouter request. |
|
* Allows overriding default model routing options. |
|
*/ |
|
function _buildRoutingOptions( |
|
primaryModel: ModelInfo, |
|
backupModels: ModelInfo[], |
|
routingOptions?: RoutingOptions // Add optional override parameter |
|
): { |
|
provider: Record<string, any>; |
|
models: string[]; |
|
reasoning?: ReasoningOptions; |
|
} { |
|
// Check for noRoutingOptions flag to short-circuit at the very top |
|
if (routingOptions?.noRoutingOptions) { |
|
return { |
|
provider: {}, // Return an empty provider object |
|
models: [], // Return an empty array for models, ignoring any backups |
|
reasoning: undefined, |
|
}; |
|
} |
|
|
|
// Build provider preferences object, always requiring parameter support for structured output |
|
const providerPrefs: Record<string, any> = { |
|
require_parameters: true, |
|
}; |
|
|
|
// Apply overrides first, then model defaults if no override exists |
|
const finalSort = routingOptions?.providerSort ?? primaryModel.defaultRoutingOptions?.providerSort; |
|
const finalOrder = routingOptions?.providerOrder ?? primaryModel.defaultRoutingOptions?.providerOrder; |
|
const finalIgnore = routingOptions?.ignoredProviders ?? primaryModel.defaultRoutingOptions?.ignoredProviders; |
|
const finalProviderOnly = routingOptions?.providerOnly ?? primaryModel.defaultRoutingOptions?.providerOnly; |
|
const finalAllowFallbacks = routingOptions?.allowFallbacks ?? primaryModel.defaultRoutingOptions?.allowFallbacks; |
|
const finalReasoning = routingOptions?.reasoning ?? primaryModel.defaultRoutingOptions?.reasoning; |
|
|
|
// HACK: for some reason, openai models don't respect require_parameters |
|
if (primaryModel.id.startsWith('openai/')) { |
|
providerPrefs.require_parameters = false; |
|
} |
|
|
|
if (finalSort) { |
|
providerPrefs.sort = finalSort; |
|
} |
|
if (finalOrder && finalOrder.length > 0) { |
|
providerPrefs.order = finalOrder; |
|
} |
|
if (finalIgnore && finalIgnore.length > 0) { |
|
providerPrefs.ignore = finalIgnore; |
|
} |
|
if (finalProviderOnly && finalProviderOnly.length > 0) { |
|
providerPrefs.only = finalProviderOnly; |
|
} |
|
if (finalAllowFallbacks !== undefined) { |
|
providerPrefs.allow_fallbacks = finalAllowFallbacks; |
|
} |
|
|
|
// Prepare fallback model IDs (always an array, empty if no backups) |
|
const fallbackModelIds = backupModels && backupModels.length > 0 ? backupModels.map(model => model.id) : []; |
|
|
|
return { provider: providerPrefs, models: fallbackModelIds, reasoning: finalReasoning }; |
|
} |
|
|
|
/** |
|
* Appends Helicone custom property headers to an existing headers object. |
|
* Mutates the passed headers object. |
|
*/ |
|
function _appendHeliconePropertyHeaders( |
|
headers: Record<string, string>, |
|
metadata: Record<string, any> // Accept metadata object |
|
): void { |
|
for (const [key, value] of Object.entries(metadata)) { |
|
// Handle the special userId case first since helicone segments usage by User-Id |
|
if (key === 'userId') { |
|
const userIdValue = value; |
|
headers['Helicone-User-Id'] = userIdValue; |
|
} |
|
|
|
// Generic property handling |
|
let stringValue: string | undefined; |
|
if (value === null || typeof value === 'undefined') { |
|
stringValue = undefined; // Skip null/undefined values |
|
} else if (typeof value === 'string') { |
|
stringValue = value; |
|
} else { |
|
stringValue = JSON.stringify(value); |
|
} |
|
|
|
if (stringValue) { |
|
// Only add if value is not empty/null/undefined |
|
headers[`Helicone-Property-${key}`] = stringValue; |
|
} |
|
} |
|
} |
|
|
|
/** |
|
* Helper function to make the actual API call to OpenRouter. |
|
* Handles fetch request, headers, basic response validation, and extracts content string. |
|
*/ |
|
async function makeOpenrouterCall(requestBody: any, metadata?: Record<string, any>): Promise<string> { |
|
if (!OPENROUTER_API_KEY) { |
|
throw new Error('OPENROUTER_API_KEY is not set in environment variables'); |
|
} |
|
|
|
// Set the headers |
|
const headers = { ...OPENROUTER_DEFAULT_HEADERS }; |
|
|
|
// Append Helicone properties from metadata if applicable |
|
if (HELICONE_API_KEY && metadata) { |
|
// Used for tracing user specific LLM requests |
|
_appendHeliconePropertyHeaders(headers, metadata); |
|
} |
|
|
|
try { |
|
const response = await fetch(`${OPENROUTER_BASE_URL}/chat/completions`, { |
|
method: 'POST', |
|
headers: headers, |
|
body: JSON.stringify(requestBody), |
|
}); |
|
|
|
// Handle successful response first |
|
if (response.ok) { |
|
const data = await response.json(); |
|
|
|
// console.debug('OpenRouter Response:', JSON.stringify(data, null, 2)); |
|
// Also log the message object specifically to see its contents |
|
// if (data.choices?.[0]?.message) { |
|
// console.debug('Message object:', JSON.stringify(data.choices[0].message, null, 2)); |
|
// } |
|
|
|
// Check for valid content in the successful response |
|
if (!data.choices || data.choices.length === 0 || !data.choices[0].message?.content) { |
|
const message = data.choices?.[0]?.message; |
|
console.warn('OpenRouter Warning: Response OK but no content generated.', data); |
|
|
|
// Print the actual reason/refusal/reasoning if available |
|
if (message?.refusal) { |
|
console.error('Model REFUSAL reason:', message.refusal); |
|
} |
|
if (message?.reasoning) { |
|
console.log('Model REASONING:', message.reasoning); |
|
} |
|
|
|
throw new Error( |
|
`OpenRouter response was successful (200 OK) but contained no valid choices or content. ${ |
|
message?.refusal ? `Model refused: ${message.refusal}` : |
|
'This might be due to model warm-up, scaling, or content filtering.' |
|
} Consider retrying, adjusting prompts, or using a different model/provider.` |
|
); |
|
} |
|
// Success: return the content string directly |
|
return data.choices[0].message.content; |
|
} |
|
|
|
// --- Error Handling for non-OK responses --- |
|
// https://openrouter.ai/docs/api-reference/errors |
|
let errorCode: number | string = response.status; |
|
let errorMessage = response.statusText; |
|
let errorDetails: any = null; |
|
|
|
try { |
|
const errorJson = await response.json(); |
|
if (errorJson && errorJson.error) { |
|
errorCode = errorJson.error.code || errorCode; |
|
errorMessage = errorJson.error.message || errorMessage; |
|
errorDetails = { code: errorCode, message: errorMessage, metadata: errorJson.error.metadata }; |
|
console.warn('OpenRouter API Structured Error:', errorDetails); |
|
} else { |
|
errorDetails = errorJson; |
|
console.warn(`OpenRouter API Error (${response.status}): Non-standard JSON response`, errorDetails); |
|
} |
|
} catch (jsonError) { |
|
try { |
|
errorDetails = await response.text(); |
|
} catch (textError) { |
|
errorDetails = '<Could not read error body>'; |
|
} |
|
console.warn(`OpenRouter API Error (${response.status}): Raw text response`, errorDetails); |
|
} |
|
|
|
const finalErrorMessage = `OpenRouter API request failed (${errorCode}): ${errorMessage}`; |
|
// console.debug(`Openrouter request body that failed: ${JSON.stringify(requestBody, null, 2)}`); |
|
throw new Error(finalErrorMessage); |
|
} catch (error) { |
|
// Catch fetch errors or errors thrown from response handling |
|
console.error('Error during _makeOpenrouterCall execution:', error); |
|
if (error instanceof Error) { |
|
throw error; // Re-throw the original error |
|
} else { |
|
throw new Error('An unknown error occurred during the OpenRouter API call process.'); |
|
} |
|
} |
|
} |
|
|
|
/** |
|
* Makes an LLM call to OpenRouter using fetch, supporting structured outputs (JSON Schema) |
|
* and model fallbacks. Allows specifying provider sorting and ordering preferences. |
|
*/ |
|
export async function _callInStructuredOutputMode<T extends ZodType>({ |
|
systemPrompt, |
|
userPrompt, |
|
primaryModel, |
|
backupModels = [], |
|
schema, |
|
schemaName = 'response', |
|
routingOptions, |
|
metadata, |
|
temperature, |
|
}: { |
|
systemPrompt: string; |
|
userPrompt: string; |
|
primaryModel: ModelInfo; |
|
backupModels?: ModelInfo[]; |
|
schema: T; |
|
schemaName?: string; |
|
routingOptions?: RoutingOptions; |
|
metadata?: Record<string, any>; |
|
temperature?: number; |
|
}): Promise<z.infer<T>> { |
|
const messages = [ |
|
{ role: 'system' as const, content: systemPrompt }, |
|
{ role: 'user' as const, content: userPrompt }, |
|
]; |
|
|
|
// Generate the JSON schema object using zod-to-json-schema default strategy |
|
// The default behavior should set additionalProperties: false for object schemas |
|
const finalJsonSchema = zodToJsonSchema(schema as any); |
|
|
|
// Get provider preferences and fallback models using the helper function |
|
const { provider: providerPrefs, models: fallbackModelIds, reasoning: modelReasoning } = _buildRoutingOptions( |
|
primaryModel, |
|
backupModels, |
|
routingOptions |
|
); |
|
|
|
// OpenRouter request body |
|
const requestBody: any = { |
|
model: primaryModel.id, |
|
messages: messages, |
|
response_format: { |
|
type: 'json_schema', |
|
json_schema: { |
|
name: schemaName, |
|
strict: true, // OpenRouter specific strict flag |
|
schema: finalJsonSchema, // Pass the generated (and potentially modified) JSON schema |
|
}, |
|
}, |
|
provider: providerPrefs, // Assign the provider object, |
|
}; |
|
|
|
// Add reasoning parameter from model configuration |
|
if (modelReasoning) { |
|
requestBody.reasoning = modelReasoning; |
|
} |
|
|
|
// Add temperature if provided |
|
if (temperature !== undefined) { |
|
requestBody.temperature = temperature; |
|
} |
|
|
|
// Only include models field for backup models, not for primary model |
|
if (backupModels && backupModels.length > 0) { |
|
requestBody.models = fallbackModelIds; |
|
} |
|
|
|
// Make the OpenRouter API call using the helper function |
|
try { |
|
// Pass metadata directly to makeOpenrouterCall |
|
const contentString = await makeOpenrouterCall(requestBody, metadata); |
|
|
|
let parsedContent: any; |
|
try { |
|
parsedContent = JSON.parse(contentString); |
|
} catch (parseError) { |
|
console.error( |
|
'Failed to parse JSON content string from OpenRouter response:', |
|
contentString, |
|
parseError |
|
); |
|
throw new Error('Failed to parse structured output from LLM response.'); |
|
} |
|
|
|
// Check for empty object if allowEmpty is false |
|
if (metadata?.allowEmpty === false && |
|
parsedContent && |
|
typeof parsedContent === 'object' && |
|
Object.keys(parsedContent).length === 0) { |
|
console.error('LLM returned empty object when allowEmpty is false'); |
|
throw new Error('LLM response returned empty object when non-empty response was required'); |
|
} |
|
|
|
// Validate the parsed content against the original Zod schema |
|
const validationResult = schema.safeParse(parsedContent); |
|
if (!validationResult.success) { |
|
console.error('Zod validation failed for OpenRouter response:', validationResult.error.format()); |
|
console.error( |
|
`Raw content: ${contentString} and parsed content: ${JSON.stringify(parsedContent, null, 2)}` |
|
); |
|
throw new Error(`LLM response failed Zod validation: ${validationResult.error.message}`); |
|
} |
|
return validationResult.data; |
|
} catch (error) { |
|
// The error from _makeOpenrouterCall is caught here |
|
console.warn('Error processing OpenRouter response in makeStructuredLLMCall:', error); |
|
// Optional: Log the request body that led to the failure if needed for debugging |
|
// console.debug(`Openrouter request body that failed: ${JSON.stringify(requestBody, null, 2)}`); |
|
if (error instanceof Error) { |
|
throw error; // Re-throw the original error |
|
} else { |
|
throw new Error('An unknown error occurred processing the LLM API response.'); |
|
} |
|
} |
|
} |
|
|
|
|
|
// Cleans a JSON response by finding the first { and last } and stripping everything before and after. |
|
export function cleanJsonResponse(raw: string): string { |
|
return raw |
|
.replace(/^[^\[{]*/, '') // keep from first { or [ |
|
.replace(/[^}\]]*$/, '') // keep through last } or ] |
|
.trim(); |
|
} |
|
|
|
/** |
|
* Makes an LLM call to OpenRouter requesting raw JSON output (using json_object mode). |
|
* This does not enforce a specific schema but ensures the output is valid JSON. |
|
*/ |
|
export async function _callInJSONMode({ |
|
systemPrompt, |
|
userPrompt, |
|
primaryModel, |
|
backupModels = [], |
|
routingOptions, |
|
metadata, |
|
temperature, |
|
}: { |
|
systemPrompt?: string; |
|
userPrompt: string; |
|
primaryModel: ModelInfo; |
|
backupModels?: ModelInfo[]; |
|
routingOptions?: RoutingOptions; |
|
metadata?: Record<string, any>; |
|
temperature?: number; |
|
}): Promise<any> { |
|
// Return type is any as we don't validate against a specific schema |
|
|
|
// Standard system prompt for JSON mode |
|
const actualSystemPrompt = |
|
systemPrompt || |
|
'You are an AI assistant. Your task is to respond STRICTLY with valid JSON format. Do NOT include any explanations, introductory text, or markdown code fences (like ```json). Only output the raw JSON object.'; |
|
|
|
const messages = [ |
|
{ role: 'system' as const, content: actualSystemPrompt }, |
|
{ role: 'user' as const, content: userPrompt }, |
|
]; |
|
|
|
// Get provider preferences and fallback models |
|
const { provider: providerPrefs, models: fallbackModelIds, reasoning: modelReasoning } = _buildRoutingOptions( |
|
primaryModel, |
|
backupModels, |
|
routingOptions |
|
); |
|
|
|
// Build the request body for JSON mode |
|
const requestBody: any = { |
|
model: primaryModel.id, |
|
messages: messages, |
|
response_format: { type: 'json_object' }, // Use JSON mode |
|
provider: providerPrefs, |
|
}; |
|
|
|
// Add reasoning parameter from model configuration |
|
if (modelReasoning) { |
|
requestBody.reasoning = modelReasoning; |
|
} |
|
|
|
// Add temperature if provided |
|
if (temperature !== undefined) { |
|
requestBody.temperature = temperature; |
|
} |
|
|
|
// Make the OpenRouter API call using the helper function |
|
try { |
|
// Pass metadata directly to makeOpenrouterCall |
|
const rawContentString = await makeOpenrouterCall(requestBody, metadata); |
|
|
|
// Clean the string surgically by finding first { and last } |
|
const cleanedContent = cleanJsonResponse(rawContentString); |
|
|
|
try { |
|
// Attempt to parse the cleaned string |
|
const parsedJson = JSON.parse(cleanedContent); |
|
|
|
// Check for empty object if allowEmpty is false |
|
if (metadata?.allowEmpty === false && |
|
parsedJson && |
|
typeof parsedJson === 'object' && |
|
Object.keys(parsedJson).length === 0) { |
|
console.error('LLM returned empty object when allowEmpty is false'); |
|
throw new Error('LLM response returned empty object when non-empty response was required'); |
|
} |
|
|
|
return parsedJson; |
|
} catch (parseError) { |
|
console.error('Failed to parse JSON content string from OpenRouter JSON mode response:', parseError); |
|
console.error('Cleaned Content that failed parsing:', cleanedContent); |
|
console.error('Raw Content received:', rawContentString); |
|
throw new Error('Failed to parse JSON output from LLM response even after cleaning.'); |
|
} |
|
} catch (error) { |
|
// Catch errors from makeOpenrouterCall or the parsing block |
|
console.error('Error during makeJSONLLMCall:', error); |
|
// Log the request body that might have caused the failure (excluding potentially sensitive prompts if needed) |
|
// console.debug(`Openrouter request body that failed in makeJSONLLMCall: ${JSON.stringify({...requestBody, messages: '[MESSAGES OMITTED]'}, null, 2)}`); |
|
if (error instanceof Error) { |
|
throw error; // Re-throw the original error |
|
} else { |
|
throw new Error('An unknown error occurred during the JSON LLM API call.'); |
|
} |
|
} |
|
} |
|
|
|
/** |
|
* Unified LLM call function that intelligently chooses between structured output |
|
* (if model supports it and schema is provided) and basic JSON mode. |
|
*/ |
|
export async function makeLLMCall<T extends ZodType>({ |
|
systemPrompt, |
|
userPrompt, |
|
primaryModel, |
|
backupModels = [], |
|
schema, |
|
schemaName = 'response', |
|
forceJsonMode = false, |
|
routingOptions, |
|
metadata, |
|
temperature, |
|
}: { |
|
systemPrompt?: string; |
|
userPrompt: string; |
|
primaryModel: ModelInfo; |
|
backupModels?: ModelInfo[]; |
|
schema?: T; |
|
schemaName?: string; |
|
forceJsonMode?: boolean; |
|
routingOptions?: RoutingOptions; |
|
metadata?: Record<string, any>; |
|
temperature?: number; |
|
}): Promise<any> { |
|
// Returns any due to potential fallback modes |
|
|
|
// (1) Try structured output mode first (if model supports it and schema is provided) |
|
if (primaryModel.supportsStructuredOutput && schema && !forceJsonMode) { |
|
try { |
|
// Prepare the system prompt for structured output |
|
const actualSystemPrompt = |
|
systemPrompt || |
|
`You are a helpful assistant that responds in JSON format according to the provided schema.`; |
|
|
|
// Call the structured output mode helper function with modified routing options |
|
return await _callInStructuredOutputMode({ |
|
systemPrompt: actualSystemPrompt, |
|
userPrompt, |
|
primaryModel, |
|
backupModels: [], // Don't pass backup models to structured output mode |
|
schema, |
|
schemaName, |
|
routingOptions: { |
|
...routingOptions, |
|
allowFallbacks: false, // Don't allow fallbacks for primary model |
|
providerOnly: primaryModel.defaultRoutingOptions?.providerOnly, // Keep model-specific provider restrictions |
|
}, |
|
metadata, |
|
temperature, |
|
}); |
|
} catch (error) { |
|
console.warn( |
|
`Structured output call failed for model ${primaryModel.id}. Falling back to JSON mode. Error:`, |
|
error |
|
); |
|
|
|
// (2) Try JSON mode with the same primary model |
|
try { |
|
console.log(`Trying JSON mode with model ${primaryModel.id} after structured output failure`); |
|
|
|
// Add schema as a hint if provided |
|
let promptForJsonMode = userPrompt; |
|
if (schema) { |
|
try { |
|
const jsonSchemaForHint = zodToJsonSchema(schema as any, { |
|
removeAdditionalStrategy: 'strict', |
|
}); |
|
promptForJsonMode += `\n\n--- JSON STRUCTURE HINT. FOLLOW THIS STRUCTURE EXACTLY! ---\n${JSON.stringify(jsonSchemaForHint, null, 2)}\n--- END JSON STRUCTURE HINT ---`; |
|
console.log(`Added schema hint to prompt for JSON object mode.`); |
|
} catch (schemaError) { |
|
console.warn('Failed to generate JSON schema for hint, proceeding without hint:', schemaError); |
|
} |
|
} |
|
|
|
// Call JSON mode with modified routing options |
|
return await _callInJSONMode({ |
|
systemPrompt, |
|
userPrompt: promptForJsonMode, |
|
primaryModel, |
|
backupModels: [], // Don't pass backup models to JSON mode |
|
routingOptions: { |
|
...routingOptions, |
|
allowFallbacks: true, // Allow fallbacks since nearly all providers support JSON mode |
|
}, |
|
metadata, |
|
temperature, |
|
}); |
|
} catch (jsonModeError) { |
|
console.warn( |
|
`JSON mode call failed for model ${primaryModel.id}. Falling back to backup model. Error:`, |
|
jsonModeError |
|
); |
|
|
|
// If JSON mode with primary model fails, try the retry ladder with backup models |
|
if (backupModels && backupModels.length > 0) { |
|
const firstBackup = backupModels[0]; |
|
const restBackups = backupModels.length > 1 ? backupModels.slice(1) : []; |
|
|
|
try { |
|
// (3) structured on first backup (if it supports structured output and schema exists) |
|
if (firstBackup.supportsStructuredOutput && schema) { |
|
console.log(`Trying structured output on backup model: ${firstBackup.id}`); |
|
|
|
// Prepare the system prompt for structured output |
|
const backupSystemPrompt = |
|
systemPrompt || |
|
`You are a helpful assistant that responds in JSON format according to the provided schema.`; |
|
|
|
return await _callInStructuredOutputMode({ |
|
systemPrompt: backupSystemPrompt, |
|
userPrompt, |
|
primaryModel: firstBackup, |
|
backupModels: restBackups, |
|
schema, |
|
schemaName, |
|
routingOptions, |
|
metadata, |
|
temperature, |
|
}); |
|
} else { |
|
// Skip to JSON mode if structured output not supported |
|
if (!firstBackup.supportsStructuredOutput) { |
|
console.warn( |
|
`Backup model ${firstBackup.id} does not support structured output. Going directly to JSON mode.` |
|
); |
|
} else if (!schema) { |
|
console.warn( |
|
`Schema not provided. Going directly to JSON mode for backup model: ${firstBackup.id}.` |
|
); |
|
} |
|
throw new Error('Skipping structured output for backup model'); |
|
} |
|
} catch (structuredBackupError) { |
|
console.error( |
|
`Structured output call failed for backup model ${firstBackup.id}. Trying JSON mode. Error:`, |
|
structuredBackupError |
|
); |
|
|
|
// (4) then JSON on that same backup |
|
// Add schema as a hint if provided |
|
let promptForJsonMode = userPrompt; |
|
if (schema) { |
|
try { |
|
const jsonSchemaForHint = zodToJsonSchema(schema as any, { |
|
removeAdditionalStrategy: 'strict', |
|
}); |
|
promptForJsonMode += `\n\n--- JSON STRUCTURE HINT. FOLLOW THIS STRUCTURE EXACTLY! ---\n${JSON.stringify(jsonSchemaForHint, null, 2)}\n--- END JSON STRUCTURE HINT ---`; |
|
console.log(`Added schema hint to prompt for JSON object mode.`); |
|
} catch (schemaError) { |
|
console.error( |
|
'Failed to generate JSON schema for hint, proceeding without hint:', |
|
schemaError |
|
); |
|
} |
|
} |
|
|
|
return await _callInJSONMode({ |
|
systemPrompt, |
|
userPrompt: promptForJsonMode, |
|
primaryModel: firstBackup, |
|
backupModels: restBackups, |
|
routingOptions: { allowFallbacks: true }, |
|
metadata, |
|
temperature, |
|
}); |
|
} |
|
} |
|
} |
|
} |
|
} |
|
|
|
// Determine why JSON mode was forced and log the reason |
|
// (due to forceJsonMode, model support, or missing schema) |
|
if (forceJsonMode) { |
|
console.log(`forceJsonMode is true. Forcing JSON object mode for model: ${primaryModel.id}`); |
|
} else if (!primaryModel.supportsStructuredOutput) { |
|
console.warn( |
|
`Model ${primaryModel.id} does not support structured output. Falling back to JSON object mode.` |
|
); |
|
} else if (!schema) { |
|
console.warn(`Schema not provided. Falling back to JSON object mode for model: ${primaryModel.id}.`); |
|
} |
|
|
|
// If we get here, it means we're using JSON mode directly (not as a fallback) |
|
// Add schema as a hint if provided |
|
let promptForJsonMode = userPrompt; |
|
if (schema) { |
|
try { |
|
const jsonSchemaForHint = zodToJsonSchema(schema as any, { |
|
removeAdditionalStrategy: 'strict', |
|
}); |
|
promptForJsonMode += `\n\n--- JSON STRUCTURE HINT --- FOLLOW THIS STRUCTURE EXACTLY:\n${JSON.stringify(jsonSchemaForHint, null, 2)}\n--- END JSON STRUCTURE HINT ---`; |
|
console.log(`Added schema hint to prompt for JSON object mode.`); |
|
} catch (schemaError) { |
|
console.warn('Failed to generate JSON schema for hint, proceeding without hint:', schemaError); |
|
} |
|
} |
|
|
|
// Call the JSON mode helper with proper error handling |
|
try { |
|
return await _callInJSONMode({ |
|
systemPrompt, |
|
userPrompt: promptForJsonMode, |
|
primaryModel, |
|
backupModels: backupModels, // These models are for openrouter failures, not JSON failures |
|
routingOptions, |
|
metadata, |
|
temperature, |
|
}); |
|
} catch (jsonModeError) { |
|
console.error( |
|
`Direct JSON mode call failed for model ${primaryModel.id}. Error:`, |
|
jsonModeError |
|
); |
|
|
|
// If JSON mode with primary model fails and we have backup models, try them |
|
if (backupModels && backupModels.length > 0) { |
|
console.log(`Attempting fallback to backup models after JSON mode failure`); |
|
|
|
// Try each backup model in JSON mode |
|
for (let i = 0; i < backupModels.length; i++) { |
|
const backupModel = backupModels[i]; |
|
console.log(`Trying JSON mode with backup model: ${backupModel.id}`); |
|
|
|
try { |
|
return await _callInJSONMode({ |
|
systemPrompt, |
|
userPrompt: promptForJsonMode, |
|
primaryModel: backupModel, |
|
backupModels: [], // No further backups for individual attempts |
|
routingOptions: { |
|
...routingOptions, |
|
allowFallbacks: true, |
|
}, |
|
metadata, |
|
temperature, |
|
}); |
|
} catch (backupError) { |
|
console.error( |
|
`JSON mode failed for backup model ${backupModel.id}:`, |
|
backupError |
|
); |
|
|
|
// If this is the last backup model, throw the error |
|
if (i === backupModels.length - 1) { |
|
throw new Error( |
|
`All models failed to process the request. Last error: ${backupError instanceof Error ? backupError.message : 'Unknown error'}` |
|
); |
|
} |
|
// Otherwise, continue to the next backup model |
|
} |
|
} |
|
} |
|
|
|
// If no backup models or all failed, re-throw the original error |
|
throw jsonModeError; |
|
} |
|
} |