|
import { Hono } from 'hono' |
|
import { logger } from 'hono/logger' |
|
|
|
/* ------------------------------------------------- |
|
* Utility / Type definitions |
|
* ------------------------------------------------- */ |
|
|
|
type ModelName = string |
|
|
|
interface WorkersToolCall { |
|
id?: string |
|
type?: string |
|
function?: { name?: string; arguments?: string } |
|
} |
|
|
|
interface WorkersAIChunk { |
|
response?: string |
|
p?: string // partial |
|
tool_calls?: WorkersToolCall[] |
|
} |
|
|
|
interface WorkersAIUsage { |
|
prompt_tokens?: number |
|
completion_tokens?: number |
|
total_tokens?: number |
|
} |
|
|
|
interface WorkersAIResponse extends WorkersAIChunk { |
|
usage?: WorkersAIUsage |
|
} |
|
|
|
const encoder = new TextEncoder() |
|
const decoder = new TextDecoder() |
|
|
|
const CRLF2 = '\n\n' |
|
const DATA_PREFIX = 'data: ' |
|
|
|
const randomId = (prefix = 'chatcmpl') => |
|
`${prefix}-${crypto.randomUUID()}` |
|
|
|
/* ------------------------------------------------- |
|
* Common: OpenAI compatible object generation |
|
* ------------------------------------------------- */ |
|
const mkOpenAIChunk = ( |
|
delta: Record<string, unknown>, |
|
model: ModelName, |
|
finish_reason: string | null = null |
|
) => ({ |
|
id: randomId(), |
|
object: 'chat.completion.chunk', |
|
created: Math.floor(Date.now() / 1000), |
|
model, |
|
choices: [ |
|
{ index: 0, delta, finish_reason } |
|
] |
|
}) |
|
|
|
/* ------------------------------------------------- |
|
* Common: SSE send helper |
|
* ------------------------------------------------- */ |
|
const enqueueJSON = ( |
|
ctrl: ReadableStreamDefaultController, |
|
obj: unknown |
|
) => ctrl.enqueue(encoder.encode(`${DATA_PREFIX}${JSON.stringify(obj)}${CRLF2}`)) |
|
|
|
const enqueueDone = (ctrl: ReadableStreamDefaultController) => |
|
ctrl.enqueue(encoder.encode(`${DATA_PREFIX}[DONE]${CRLF2}`)) |
|
|
|
/* ------------------------------------------------- |
|
* Streaming response conversion logic |
|
* ------------------------------------------------- */ |
|
const streamWorkersAI2OpenAI = ( |
|
reader: ReadableStreamDefaultReader, |
|
model: ModelName |
|
) => |
|
new ReadableStream({ |
|
async start(controller) { |
|
let buf = '' |
|
let currentToolCall: WorkersToolCall | null = null |
|
|
|
try { |
|
while (true) { |
|
const { done, value } = await reader.read() |
|
if (done) break |
|
|
|
buf += decoder.decode(value as Uint8Array) |
|
const messages = buf.split(CRLF2) |
|
buf = messages.pop() || '' |
|
|
|
for (const raw of messages) { |
|
if (!raw.startsWith(DATA_PREFIX)) continue |
|
const dataStr = raw.slice(DATA_PREFIX.length) |
|
if (dataStr === '[DONE]') continue |
|
|
|
let chunk: WorkersAIChunk |
|
try { |
|
chunk = JSON.parse(dataStr) |
|
} catch { |
|
console.log('JSON parse error:', dataStr) |
|
continue |
|
} |
|
|
|
/* ----- tool_calls ----- */ |
|
if (chunk.tool_calls?.length) { |
|
const tool = chunk.tool_calls[0] |
|
// accumulate |
|
currentToolCall ||= { |
|
id: tool.id ?? randomId('chatcmpl-tool'), |
|
type: tool.type ?? 'function', |
|
function: { name: '', arguments: '' } |
|
} |
|
if (tool.function?.arguments) |
|
currentToolCall.function!.arguments += tool.function.arguments |
|
if (tool.function?.name) |
|
currentToolCall.function!.name = tool.function.name |
|
|
|
enqueueJSON( |
|
controller, |
|
mkOpenAIChunk( |
|
{ |
|
tool_calls: [ |
|
{ |
|
index: 0, |
|
...currentToolCall |
|
} |
|
] |
|
}, |
|
model |
|
) |
|
) |
|
continue |
|
} |
|
|
|
/* ----- content ----- */ |
|
const content = chunk.response ?? chunk.p ?? '' |
|
if (content) { |
|
enqueueJSON( |
|
controller, |
|
mkOpenAIChunk({ content }, model) |
|
) |
|
} |
|
} |
|
} |
|
|
|
/* ---- final chunk ---- */ |
|
enqueueJSON( |
|
controller, |
|
mkOpenAIChunk({}, model, currentToolCall ? 'tool_calls' : 'stop') |
|
) |
|
} finally { |
|
reader.releaseLock() |
|
} |
|
enqueueDone(controller) |
|
controller.close() |
|
} |
|
}) |
|
|
|
/* ------------------------------------------------- |
|
* Non-streaming response conversion |
|
* ------------------------------------------------- */ |
|
const mapWorkersAI2OpenAI = (r: WorkersAIResponse, model: ModelName) => { |
|
const usage = { |
|
prompt_tokens: r.usage?.prompt_tokens ?? 0, |
|
completion_tokens: r.usage?.completion_tokens ?? 0, |
|
total_tokens: r.usage?.total_tokens ?? 0 |
|
} |
|
const base = { |
|
id: randomId(), |
|
object: 'chat.completion', |
|
created: Math.floor(Date.now() / 1000), |
|
model, |
|
usage, |
|
service_tier: 'default', |
|
system_fingerprint: `fp_${crypto.randomUUID().slice(0, 10)}` |
|
} |
|
|
|
if (r.tool_calls?.length) { |
|
return { |
|
...base, |
|
choices: [ |
|
{ |
|
index: 0, |
|
message: { |
|
role: 'assistant', |
|
content: null, |
|
tool_calls: r.tool_calls, |
|
refusal: null, |
|
annotations: [] |
|
}, |
|
finish_reason: 'tool_calls', |
|
logprobs: null |
|
} |
|
] |
|
} |
|
} |
|
|
|
const content = r.response ?? r.p ?? '' |
|
return { |
|
...base, |
|
choices: [ |
|
{ |
|
index: 0, |
|
message: { role: 'assistant', content, refusal: null, annotations: [] }, |
|
finish_reason: 'stop', |
|
logprobs: null |
|
} |
|
] |
|
} |
|
} |
|
|
|
/* ================================================= |
|
* Hono app body |
|
* ================================================= */ |
|
const app = new Hono<{ Bindings: { AI: any } }>() |
|
.basePath('/workersai') |
|
|
|
app.use('*', logger()) |
|
|
|
/* -------- OpenAI compatible chat endpoint -------- */ |
|
app.post('/v1/chat/completions', async c => { |
|
const body = await c.req.json() |
|
//console.log('POST /v1/chat/completions body:', body) |
|
|
|
const aiParams: any = { |
|
messages: body.messages, |
|
stream: body.stream ?? true |
|
} |
|
if (Array.isArray(body.tools) && body.tools.length) aiParams.tools = body.tools |
|
|
|
const workersRes = await c.env.AI.run(body.model, aiParams) |
|
|
|
/* ----- stream ----- */ |
|
if (workersRes instanceof ReadableStream) { |
|
c.header('Content-Type', 'text/event-stream') |
|
const reader = workersRes.getReader() |
|
return new Response(streamWorkersAI2OpenAI(reader, body.model)) |
|
} |
|
|
|
/* ----- non-stream ----- */ |
|
return c.json( |
|
mapWorkersAI2OpenAI(workersRes, body.model), |
|
200, |
|
{ 'Content-Type': 'application/json' } |
|
) |
|
}) |
|
|
|
// Cursor compatibility endpoints |
|
app.get('/v1/models', c => |
|
c.json({ models: [{ name: 'my-custom-model', model: 'gpt-4o' }] })) |
|
|
|
// Ollama API compatibility endpoints |
|
app.get('/api/tags', c => |
|
c.json({ models: [{ name: 'llama-4-scout-17b-16e-instruct', model: '@cf/meta/llama-4-scout-17b-16e-instruct' }] })) |
|
|
|
// Ollama API compatibility endpoints |
|
app.post('/api/show', c => |
|
c.json({ |
|
model_info: { 'general.basename': 'llama-4-scout-17b-16e-instruct' }, |
|
capabilities: ['completion', 'tools'] |
|
})) |
|
|
|
export default app |