laiso · June 7, 2025 10:18
diff --git a/README.md b/README.md
diff --git a/index.ts b/index.ts
 import openaiApp from './openai'
 import workersaiApp from './workersai'
 import mockApp from './mock'

 export enum AIProvider {
 	MOCK = 'mock',
 	OPENAI = 'openai',
 	WORKERSAI = 'workersai'
 }

 export const app = {
 	fetch(request: Request, env: any, ctx: ExecutionContext) {
 		const url = new URL(request.url)
 		const path = url.pathname

 		if (path.startsWith('/openai')) {
 			return openaiApp.fetch(request, env, ctx);
 		} else if (path.startsWith('/workersai')) {
 			return workersaiApp.fetch(request, env, ctx)
 		} else if (path.startsWith('/mock')) {
 			return mockApp.fetch(request, env, ctx)
 		} else if (path === '/v1/models') {
 			// Cursor API compatibility endpoints
 			return new Response(JSON.stringify({
 				models: [
 					{
 						name: "my-custom-model",
 						model: "gpt-4o",
 					}
 				]
 			}), {
 				headers: {
 					'Content-Type': 'application/json',
 					'Access-Control-Allow-Origin': '*',
 					'Access-Control-Allow-Methods': 'GET, POST, OPTIONS',
 					'Access-Control-Allow-Headers': 'Content-Type'
 				}
 			});
 		} else if (path === '/api/tags') {
 			// Ollama API compatibility endpoints
 			return new Response(JSON.stringify({
 				models: [
 					{
 						name: "workersai",
 						model: "@cf/meta/llama-4-scout-17b-16e-instruct",
 					}
 				]
 			}), {
 				headers: {
 					'Content-Type': 'application/json'
 				}
 			});
 		} else if (path === '/api/show') {
 			// Ollama API compatibility endpoints
 			return new Response(JSON.stringify({
 				"model_info": {
 					"general.basename": "llama-4-scout-17b-16e-instruct"
 				},
 				"capabilities": [
 					"completion",
 					"tools"
 				]
 			}), {
 				headers: {
 					'Content-Type': 'application/json'
 				}
 			});
 		}
 	}
 }


 export default app
diff --git a/mock.ts b/mock.ts
 import { Hono } from 'hono'
 import { logger } from 'hono/logger'
 import { streamSSE, streamText } from 'hono/streaming'


 const app = new Hono<{ Bindings: { OPENAI_API_KEY: string } }>()
 	.basePath('/mock')

 	app.use('*', logger())

 app.post('/v1/chat/completions', async (c) => {
 	const body = await c.req.json()
 	// console.log('POST /v1/chat/completions body:', JSON.stringify(body, null, 2))

 	if (body.stream) {
 		return streamSSE(c, async (stream) => {
 			await stream.writeSSE({
 				data: JSON.stringify({
 					id: "chatcmpl-BfQQQVwI3X6lVvqlWU72RZNN4leUK",
 					object: "chat.completion.chunk",
 					created: Math.floor(Date.now() / 1000),
 					model: body.model,
 					service_tier: "default",
 					system_fingerprint: "fp_9bddfca6e2",
 					choices: [{
 						index: 0,
 						delta: {
 							role: "assistant",
 							content: "Hello, I'm Mock Streaming!",
 							refusal: null
 						},
 						logprobs: null,
 						finish_reason: "stop"
 					}]
 				})
 			});

 			await stream.writeSSE({
 				data: '[DONE]'
 			});
 		});
 	}

 	return c.json({
 		id: "chatcmpl-xxxxxx",
 		object: "chat.completion",
 		created: Math.floor(Date.now() / 1000),
 		model: body.model,
 		choices: [
 			{
 				index: 0,
 				message: {
 					role: "assistant",
 					content: "Hello, I'm Mock Non-Streaming!"
 				},
 				finish_reason: "stop"
 			}
 		],
 		usage: {
 			prompt_tokens: 20,
 			completion_tokens: 25,
 			total_tokens: 45
 		}
 	})
 });

 // OpenAI API compatibility endpoints
 app.get('/v1/models', (c) => {
 	return c.json({
 		models: [
 			{
 				name: "my-custom-model",
 				model: "gpt-4o",
 			}
 		]
 	})
 });

 // Ollama API compatibility endpoints
 app.get('/api/tags', (c) => {
 	return c.json({
 		models: [
 			{
 				name: "my-custom-model",
 				model: "gpt-4o",
 			}
 		]
 	})
 });

 app.post('/api/show', async (c) => {
 	return c.json({
 		"model_info": {
 			"general.basename": "My Custom Model"
 		},
 		"capabilities": [
 			"completion",
 			"tools"
 		]
 	});
 });

 export default app
diff --git a/openai.ts b/openai.ts
 import { Hono } from 'hono'
 import { OpenAI } from 'openai'
 import { logger } from 'hono/logger'
 import { streamSSE } from 'hono/streaming'
 import { ChatCompletionChunk } from 'openai/resources/chat/completions'


 const app = new Hono<{ Bindings: { OPENAI_API_KEY: string } }>()
 	.basePath('/openai')

 app.use('*', logger())

 app.post('/v1/chat/completions', async (c) => {
 	const body = await c.req.json()
 	//console.log('POST /v1/chat/completions body:', JSON.stringify(body, null, 2))

 	const openai = new OpenAI({ apiKey: c.env.OPENAI_API_KEY })

 	const response = await openai.chat.completions.create({
 		messages: body.messages,
 		model: body.model,
 		stream: body.stream ?? false,
 		tools: body.tools ?? [],
 	})

 	if (body.stream) {
 		return streamSSE(c, async (stream) => {
 			const streamMessageProcessor = createStreamMessageProcessor();
 			const streamResponse = response as unknown as AsyncIterable<any>;
 			for await (const message of streamResponse) {
 				const processedMessage = streamMessageProcessor.processMessage(message);
 				if (processedMessage) {
 					await stream.writeSSE({
 						data: JSON.stringify(processedMessage)
 					});
 				}
 			}

 			streamMessageProcessor.flush();
 			await stream.writeSSE({
 				data: '[DONE]'
 			});
 		});
 	}

 	return c.json(response, 200, {
 		'Content-Type': 'application/json'
 	});
 });

 // TIPS: Stream message processor for handling tool calls and message transformations
 function createStreamMessageProcessor() {
 	const buffers: Record<string, string> = {};
 	const names: Record<string, string> = {};
 	const contentBuffers: Record<string, string> = {};

 	return {
 		processMessage(message: ChatCompletionChunk) {
 			message.choices?.forEach((choice: ChatCompletionChunk.Choice) => {
 				if (choice.delta?.content) {
 					const content = choice.delta.content;
 					const previousContent = contentBuffers[choice.index] ?? '';
 					contentBuffers[choice.index] = previousContent + content;

 					const currentContent = contentBuffers[choice.index];
 					if ((currentContent.includes('<write_to_file>') && !currentContent.includes('</write_to_file>'))) {
 						return null;
 					}

 					if (currentContent.includes('</write_to_file>') || currentContent.includes('</replace_in_file>')) {
 						const replacedContent = contentBuffers[choice.index]
 							.replace(/<content>[\s\S]*?from 'react-router-dom'[\s\S]*?<\/content>/g, '<content>from \'react-router\'</content>')
 							.replace(/<diff>[\s\S]*?from 'react-router-dom'[\s\S]*?<\/diff>/g, '<diff>from \'react-router\'</diff>');

 						message.choices[choice.index].delta.content = replacedContent;
 						contentBuffers[choice.index] = '';
 					}
 				}
 			});

 			return message;
 		},
 		flush() {
 			for (const [id, args] of Object.entries(buffers)) {
 				const name = names[id] || '';
 				console.log(`tool_call: ${name} arguments:`, args);
 			}
 		}
 	};
 }

 export default app
diff --git a/workersai.ts b/workersai.ts
 import { Hono } from 'hono'
 import { logger } from 'hono/logger'

 /* -------------------------------------------------
 *  Utility / Type definitions
 * ------------------------------------------------- */

 type ModelName = string

 interface WorkersToolCall {
  id?: string
  type?: string
  function?: { name?: string; arguments?: string }
 }

 interface WorkersAIChunk {
  response?: string
  p?: string                 // partial
  tool_calls?: WorkersToolCall[]
 }

 interface WorkersAIUsage {
  prompt_tokens?: number
  completion_tokens?: number
  total_tokens?: number
 }

 interface WorkersAIResponse extends WorkersAIChunk {
  usage?: WorkersAIUsage
 }

 const encoder = new TextEncoder()
 const decoder = new TextDecoder()

 const CRLF2 = '\n\n'
 const DATA_PREFIX = 'data: '

 const randomId = (prefix = 'chatcmpl') =>
  `${prefix}-${crypto.randomUUID()}`

 /* -------------------------------------------------
 *  Common: OpenAI compatible object generation
 * ------------------------------------------------- */
 const mkOpenAIChunk = (
  delta: Record<string, unknown>,
  model: ModelName,
  finish_reason: string | null = null
 ) => ({
  id: randomId(),
  object: 'chat.completion.chunk',
  created: Math.floor(Date.now() / 1000),
  model,
  choices: [
    { index: 0, delta, finish_reason }
  ]
 })

 /* -------------------------------------------------
 *  Common: SSE send helper
 * ------------------------------------------------- */
 const enqueueJSON = (
  ctrl: ReadableStreamDefaultController,
  obj: unknown
 ) => ctrl.enqueue(encoder.encode(`${DATA_PREFIX}${JSON.stringify(obj)}${CRLF2}`))

 const enqueueDone = (ctrl: ReadableStreamDefaultController) =>
  ctrl.enqueue(encoder.encode(`${DATA_PREFIX}[DONE]${CRLF2}`))

 /* -------------------------------------------------
 *  Streaming response conversion logic
 * ------------------------------------------------- */
 const streamWorkersAI2OpenAI = (
  reader: ReadableStreamDefaultReader,
  model: ModelName
 ) =>
  new ReadableStream({
    async start(controller) {
      let buf = ''
      let currentToolCall: WorkersToolCall | null = null

      try {
        while (true) {
          const { done, value } = await reader.read()
          if (done) break

          buf += decoder.decode(value as Uint8Array)
          const messages = buf.split(CRLF2)
          buf = messages.pop() || ''

          for (const raw of messages) {
            if (!raw.startsWith(DATA_PREFIX)) continue
            const dataStr = raw.slice(DATA_PREFIX.length)
            if (dataStr === '[DONE]') continue

            let chunk: WorkersAIChunk
            try {
              chunk = JSON.parse(dataStr)
            } catch {
              console.log('JSON parse error:', dataStr)
              continue
            }

            /* ----- tool_calls ----- */
            if (chunk.tool_calls?.length) {
              const tool = chunk.tool_calls[0]
              // accumulate
              currentToolCall ||= {
                id: tool.id ?? randomId('chatcmpl-tool'),
                type: tool.type ?? 'function',
                function: { name: '', arguments: '' }
              }
              if (tool.function?.arguments)
                currentToolCall.function!.arguments += tool.function.arguments
              if (tool.function?.name)
                currentToolCall.function!.name = tool.function.name

              enqueueJSON(
                controller,
                mkOpenAIChunk(
                  {
                    tool_calls: [
                      {
                        index: 0,
                        ...currentToolCall
                      }
                    ]
                  },
                  model
                )
              )
              continue
            }

            /* ----- content ----- */
            const content = chunk.response ?? chunk.p ?? ''
            if (content) {
              enqueueJSON(
                controller,
                mkOpenAIChunk({ content }, model)
              )
            }
          }
        }

        /* ---- final chunk ---- */
        enqueueJSON(
          controller,
          mkOpenAIChunk({}, model, currentToolCall ? 'tool_calls' : 'stop')
        )
      } finally {
        reader.releaseLock()
      }
      enqueueDone(controller)
      controller.close()
    }
  })

 /* -------------------------------------------------
 *  Non-streaming response conversion
 * ------------------------------------------------- */
 const mapWorkersAI2OpenAI = (r: WorkersAIResponse, model: ModelName) => {
  const usage = {
    prompt_tokens: r.usage?.prompt_tokens ?? 0,
    completion_tokens: r.usage?.completion_tokens ?? 0,
    total_tokens: r.usage?.total_tokens ?? 0
  }
  const base = {
    id: randomId(),
    object: 'chat.completion',
    created: Math.floor(Date.now() / 1000),
    model,
    usage,
    service_tier: 'default',
    system_fingerprint: `fp_${crypto.randomUUID().slice(0, 10)}`
  }

  if (r.tool_calls?.length) {
    return {
      ...base,
      choices: [
        {
          index: 0,
          message: {
            role: 'assistant',
            content: null,
            tool_calls: r.tool_calls,
            refusal: null,
            annotations: []
          },
          finish_reason: 'tool_calls',
          logprobs: null
        }
      ]
    }
  }

  const content = r.response ?? r.p ?? ''
  return {
    ...base,
    choices: [
      {
        index: 0,
        message: { role: 'assistant', content, refusal: null, annotations: [] },
        finish_reason: 'stop',
        logprobs: null
      }
    ]
  }
 }

 /* =================================================
 *  Hono app body
 * ================================================= */
 const app = new Hono<{ Bindings: { AI: any } }>()
 	.basePath('/workersai')

 	app.use('*', logger())

 /* -------- OpenAI compatible chat endpoint -------- */
 app.post('/v1/chat/completions', async c => {
  const body = await c.req.json()
  //console.log('POST /v1/chat/completions body:', body)

  const aiParams: any = {
    messages: body.messages,
    stream: body.stream ?? true
  }
  if (Array.isArray(body.tools) && body.tools.length) aiParams.tools = body.tools

  const workersRes = await c.env.AI.run(body.model, aiParams)

  /* ----- stream ----- */
  if (workersRes instanceof ReadableStream) {
    c.header('Content-Type', 'text/event-stream')
    const reader = workersRes.getReader()
    return new Response(streamWorkersAI2OpenAI(reader, body.model))
  }

  /* ----- non-stream ----- */
  return c.json(
    mapWorkersAI2OpenAI(workersRes, body.model),
    200,
    { 'Content-Type': 'application/json' }
  )
 })

 // Cursor compatibility endpoints
 app.get('/v1/models', c =>
  c.json({ models: [{ name: 'my-custom-model', model: 'gpt-4o' }] }))

 // Ollama API compatibility endpoints
 app.get('/api/tags', c =>
  c.json({ models: [{ name: 'llama-4-scout-17b-16e-instruct', model: '@cf/meta/llama-4-scout-17b-16e-instruct' }] }))

 // Ollama API compatibility endpoints
 app.post('/api/show', c =>
  c.json({
    model_info: { 'general.basename': 'llama-4-scout-17b-16e-instruct' },
    capabilities: ['completion', 'tools']
  }))

 export default app
	import openaiApp from './openai'
	import workersaiApp from './workersai'
	import mockApp from './mock'

	export enum AIProvider {
	MOCK = 'mock',
	OPENAI = 'openai',
	WORKERSAI = 'workersai'
	}

	export const app = {
	fetch(request: Request, env: any, ctx: ExecutionContext) {
	const url = new URL(request.url)
	const path = url.pathname

	if (path.startsWith('/openai')) {
	return openaiApp.fetch(request, env, ctx);
	} else if (path.startsWith('/workersai')) {
	return workersaiApp.fetch(request, env, ctx)
	} else if (path.startsWith('/mock')) {
	return mockApp.fetch(request, env, ctx)
	} else if (path === '/v1/models') {
	// Cursor API compatibility endpoints
	return new Response(JSON.stringify({
	models: [
	{
	name: "my-custom-model",
	model: "gpt-4o",
	}
	]
	}), {
	headers: {
	'Content-Type': 'application/json',
	'Access-Control-Allow-Origin': '*',
	'Access-Control-Allow-Methods': 'GET, POST, OPTIONS',
	'Access-Control-Allow-Headers': 'Content-Type'
	}
	});
	} else if (path === '/api/tags') {
	// Ollama API compatibility endpoints
	return new Response(JSON.stringify({
	models: [
	{
	name: "workersai",
	model: "@cf/meta/llama-4-scout-17b-16e-instruct",
	}
	]
	}), {
	headers: {
	'Content-Type': 'application/json'
	}
	});
	} else if (path === '/api/show') {
	// Ollama API compatibility endpoints
	return new Response(JSON.stringify({
	"model_info": {
	"general.basename": "llama-4-scout-17b-16e-instruct"
	},
	"capabilities": [
	"completion",
	"tools"
	]
	}), {
	headers: {
	'Content-Type': 'application/json'
	}
	});
	}
	}
	}


	export default app
	import { Hono } from 'hono'
	import { logger } from 'hono/logger'
	import { streamSSE, streamText } from 'hono/streaming'


	const app = new Hono<{ Bindings: { OPENAI_API_KEY: string } }>()
	.basePath('/mock')

	app.use('*', logger())

	app.post('/v1/chat/completions', async (c) => {
	const body = await c.req.json()
	// console.log('POST /v1/chat/completions body:', JSON.stringify(body, null, 2))

	if (body.stream) {
	return streamSSE(c, async (stream) => {
	await stream.writeSSE({
	data: JSON.stringify({
	id: "chatcmpl-BfQQQVwI3X6lVvqlWU72RZNN4leUK",
	object: "chat.completion.chunk",
	created: Math.floor(Date.now() / 1000),
	model: body.model,
	service_tier: "default",
	system_fingerprint: "fp_9bddfca6e2",
	choices: [{
	index: 0,
	delta: {
	role: "assistant",
	content: "Hello, I'm Mock Streaming!",
	refusal: null
	},
	logprobs: null,
	finish_reason: "stop"
	}]
	})
	});

	await stream.writeSSE({
	data: '[DONE]'
	});
	});
	}

	return c.json({
	id: "chatcmpl-xxxxxx",
	object: "chat.completion",
	created: Math.floor(Date.now() / 1000),
	model: body.model,
	choices: [
	{
	index: 0,
	message: {
	role: "assistant",
	content: "Hello, I'm Mock Non-Streaming!"
	},
	finish_reason: "stop"
	}
	],
	usage: {
	prompt_tokens: 20,
	completion_tokens: 25,
	total_tokens: 45
	}
	})
	});

	// OpenAI API compatibility endpoints
	app.get('/v1/models', (c) => {
	return c.json({
	models: [
	{
	name: "my-custom-model",
	model: "gpt-4o",
	}
	]
	})
	});

	// Ollama API compatibility endpoints
	app.get('/api/tags', (c) => {
	return c.json({
	models: [
	{
	name: "my-custom-model",
	model: "gpt-4o",
	}
	]
	})
	});

	app.post('/api/show', async (c) => {
	return c.json({
	"model_info": {
	"general.basename": "My Custom Model"
	},
	"capabilities": [
	"completion",
	"tools"
	]
	});
	});

	export default app
	import { Hono } from 'hono'
	import { OpenAI } from 'openai'
	import { logger } from 'hono/logger'
	import { streamSSE } from 'hono/streaming'
	import { ChatCompletionChunk } from 'openai/resources/chat/completions'


	const app = new Hono<{ Bindings: { OPENAI_API_KEY: string } }>()
	.basePath('/openai')

	app.use('*', logger())

	app.post('/v1/chat/completions', async (c) => {
	const body = await c.req.json()
	//console.log('POST /v1/chat/completions body:', JSON.stringify(body, null, 2))

	const openai = new OpenAI({ apiKey: c.env.OPENAI_API_KEY })

	const response = await openai.chat.completions.create({
	messages: body.messages,
	model: body.model,
	stream: body.stream ?? false,
	tools: body.tools ?? [],
	})

	if (body.stream) {
	return streamSSE(c, async (stream) => {
	const streamMessageProcessor = createStreamMessageProcessor();
	const streamResponse = response as unknown as AsyncIterable<any>;
	for await (const message of streamResponse) {
	const processedMessage = streamMessageProcessor.processMessage(message);
	if (processedMessage) {
	await stream.writeSSE({
	data: JSON.stringify(processedMessage)
	});
	}
	}

	streamMessageProcessor.flush();
	await stream.writeSSE({
	data: '[DONE]'
	});
	});
	}

	return c.json(response, 200, {
	'Content-Type': 'application/json'
	});
	});

	// TIPS: Stream message processor for handling tool calls and message transformations
	function createStreamMessageProcessor() {
	const buffers: Record<string, string> = {};
	const names: Record<string, string> = {};
	const contentBuffers: Record<string, string> = {};

	return {
	processMessage(message: ChatCompletionChunk) {
	message.choices?.forEach((choice: ChatCompletionChunk.Choice) => {
	if (choice.delta?.content) {
	const content = choice.delta.content;
	const previousContent = contentBuffers[choice.index] ?? '';
	contentBuffers[choice.index] = previousContent + content;

	const currentContent = contentBuffers[choice.index];
	if ((currentContent.includes('<write_to_file>') && !currentContent.includes('</write_to_file>'))) {
	return null;
	}

	if (currentContent.includes('</write_to_file>') \|\| currentContent.includes('</replace_in_file>')) {
	const replacedContent = contentBuffers[choice.index]
	.replace(/<content>[\s\S]?from 'react-router-dom'[\s\S]?<\/content>/g, '<content>from \'react-router\'</content>')
	.replace(/<diff>[\s\S]?from 'react-router-dom'[\s\S]?<\/diff>/g, '<diff>from \'react-router\'</diff>');

	message.choices[choice.index].delta.content = replacedContent;
	contentBuffers[choice.index] = '';
	}
	}
	});

	return message;
	},
	flush() {
	for (const [id, args] of Object.entries(buffers)) {
	const name = names[id] \|\| '';
	console.log(`tool_call: ${name} arguments:`, args);
	}
	}
	};
	}

	export default app
	import { Hono } from 'hono'
	import { logger } from 'hono/logger'

	/* -------------------------------------------------
	* Utility / Type definitions
	* ------------------------------------------------- */

	type ModelName = string

	interface WorkersToolCall {
	id?: string
	type?: string
	function?: { name?: string; arguments?: string }
	}

	interface WorkersAIChunk {
	response?: string
	p?: string // partial
	tool_calls?: WorkersToolCall[]
	}

	interface WorkersAIUsage {
	prompt_tokens?: number
	completion_tokens?: number
	total_tokens?: number
	}

	interface WorkersAIResponse extends WorkersAIChunk {
	usage?: WorkersAIUsage
	}

	const encoder = new TextEncoder()
	const decoder = new TextDecoder()

	const CRLF2 = '\n\n'
	const DATA_PREFIX = 'data: '

	const randomId = (prefix = 'chatcmpl') =>
	`${prefix}-${crypto.randomUUID()}`

	/* -------------------------------------------------
	* Common: OpenAI compatible object generation
	* ------------------------------------------------- */
	const mkOpenAIChunk = (
	delta: Record<string, unknown>,
	model: ModelName,
	finish_reason: string \| null = null
	) => ({
	id: randomId(),
	object: 'chat.completion.chunk',
	created: Math.floor(Date.now() / 1000),
	model,
	choices: [
	{ index: 0, delta, finish_reason }
	]
	})

	/* -------------------------------------------------
	* Common: SSE send helper
	* ------------------------------------------------- */
	const enqueueJSON = (
	ctrl: ReadableStreamDefaultController,
	obj: unknown
	) => ctrl.enqueue(encoder.encode(`${DATA_PREFIX}${JSON.stringify(obj)}${CRLF2}`))

	const enqueueDone = (ctrl: ReadableStreamDefaultController) =>
	ctrl.enqueue(encoder.encode(`${DATA_PREFIX}[DONE]${CRLF2}`))

	/* -------------------------------------------------
	* Streaming response conversion logic
	* ------------------------------------------------- */
	const streamWorkersAI2OpenAI = (
	reader: ReadableStreamDefaultReader,
	model: ModelName
	) =>
	new ReadableStream({
	async start(controller) {
	let buf = ''
	let currentToolCall: WorkersToolCall \| null = null

	try {
	while (true) {
	const { done, value } = await reader.read()
	if (done) break

	buf += decoder.decode(value as Uint8Array)
	const messages = buf.split(CRLF2)
	buf = messages.pop() \|\| ''

	for (const raw of messages) {
	if (!raw.startsWith(DATA_PREFIX)) continue
	const dataStr = raw.slice(DATA_PREFIX.length)
	if (dataStr === '[DONE]') continue

	let chunk: WorkersAIChunk
	try {
	chunk = JSON.parse(dataStr)
	} catch {
	console.log('JSON parse error:', dataStr)
	continue
	}

	/* ----- tool_calls ----- */
	if (chunk.tool_calls?.length) {
	const tool = chunk.tool_calls[0]
	// accumulate
	currentToolCall \|\|= {
	id: tool.id ?? randomId('chatcmpl-tool'),
	type: tool.type ?? 'function',
	function: { name: '', arguments: '' }
	}
	if (tool.function?.arguments)
	currentToolCall.function!.arguments += tool.function.arguments
	if (tool.function?.name)
	currentToolCall.function!.name = tool.function.name

	enqueueJSON(
	controller,
	mkOpenAIChunk(
	{
	tool_calls: [
	{
	index: 0,
	...currentToolCall
	}
	]
	},
	model
	)
	)
	continue
	}

	/* ----- content ----- */
	const content = chunk.response ?? chunk.p ?? ''
	if (content) {
	enqueueJSON(
	controller,
	mkOpenAIChunk({ content }, model)
	)
	}
	}
	}

	/* ---- final chunk ---- */
	enqueueJSON(
	controller,
	mkOpenAIChunk({}, model, currentToolCall ? 'tool_calls' : 'stop')
	)
	} finally {
	reader.releaseLock()
	}
	enqueueDone(controller)
	controller.close()
	}
	})

	/* -------------------------------------------------
	* Non-streaming response conversion
	* ------------------------------------------------- */
	const mapWorkersAI2OpenAI = (r: WorkersAIResponse, model: ModelName) => {
	const usage = {
	prompt_tokens: r.usage?.prompt_tokens ?? 0,
	completion_tokens: r.usage?.completion_tokens ?? 0,
	total_tokens: r.usage?.total_tokens ?? 0
	}
	const base = {
	id: randomId(),
	object: 'chat.completion',
	created: Math.floor(Date.now() / 1000),
	model,
	usage,
	service_tier: 'default',
	system_fingerprint: `fp_${crypto.randomUUID().slice(0, 10)}`
	}

	if (r.tool_calls?.length) {
	return {
	...base,
	choices: [
	{
	index: 0,
	message: {
	role: 'assistant',
	content: null,
	tool_calls: r.tool_calls,
	refusal: null,
	annotations: []
	},
	finish_reason: 'tool_calls',
	logprobs: null
	}
	]
	}
	}

	const content = r.response ?? r.p ?? ''
	return {
	...base,
	choices: [
	{
	index: 0,
	message: { role: 'assistant', content, refusal: null, annotations: [] },
	finish_reason: 'stop',
	logprobs: null
	}
	]
	}
	}

	/* =================================================
	* Hono app body
	* ================================================= */
	const app = new Hono<{ Bindings: { AI: any } }>()
	.basePath('/workersai')

	app.use('*', logger())

	/* -------- OpenAI compatible chat endpoint -------- */
	app.post('/v1/chat/completions', async c => {
	const body = await c.req.json()
	//console.log('POST /v1/chat/completions body:', body)

	const aiParams: any = {
	messages: body.messages,
	stream: body.stream ?? true
	}
	if (Array.isArray(body.tools) && body.tools.length) aiParams.tools = body.tools

	const workersRes = await c.env.AI.run(body.model, aiParams)

	/* ----- stream ----- */
	if (workersRes instanceof ReadableStream) {
	c.header('Content-Type', 'text/event-stream')
	const reader = workersRes.getReader()
	return new Response(streamWorkersAI2OpenAI(reader, body.model))
	}

	/* ----- non-stream ----- */
	return c.json(
	mapWorkersAI2OpenAI(workersRes, body.model),
	200,
	{ 'Content-Type': 'application/json' }
	)
	})

	// Cursor compatibility endpoints
	app.get('/v1/models', c =>
	c.json({ models: [{ name: 'my-custom-model', model: 'gpt-4o' }] }))

	// Ollama API compatibility endpoints
	app.get('/api/tags', c =>
	c.json({ models: [{ name: 'llama-4-scout-17b-16e-instruct', model: '@cf/meta/llama-4-scout-17b-16e-instruct' }] }))

	// Ollama API compatibility endpoints
	app.post('/api/show', c =>
	c.json({
	model_info: { 'general.basename': 'llama-4-scout-17b-16e-instruct' },
	capabilities: ['completion', 'tools']
	}))

	export default app