Skip to content

Instantly share code, notes, and snippets.

@laiso
Last active June 7, 2025 10:18
Show Gist options
  • Save laiso/2ddc2076583d30c9f20c4e89f7b9aa9a to your computer and use it in GitHub Desktop.
Save laiso/2ddc2076583d30c9f20c4e89f7b9aa9a to your computer and use it in GitHub Desktop.
Custom AI editor tool override API with OpenAI Chat Completion compatibility

This project creates a custom code generation tool API for AI editors (VS Code, Cursor). The main purpose is to override Tool Use functionality in AI editors and implement custom code generation logic, while maintaining full compatibility with Chat Completions API and Ollama API so existing editor configurations can be used as-is.

The supported environment includes VS Code and Cursor editors, with deployment capability on Cloudflare Workers. The API must support streaming=true for real-time responses. Tool Call detailed tracking and log output are provided for debugging purposes.

This code was created with reference to the Vercel V0 API design. https://vercel.com/docs/v0/api

npm create cloudflare@latest
npm i hono opena
npx wrangler secret put OPENAI_API_KEY
npm run dev
npm run deploy

Cline

image

Copilot Chat

image

Cursor

image

Testing

image
import openaiApp from './openai'
import workersaiApp from './workersai'
import mockApp from './mock'
export enum AIProvider {
MOCK = 'mock',
OPENAI = 'openai',
WORKERSAI = 'workersai'
}
export const app = {
fetch(request: Request, env: any, ctx: ExecutionContext) {
const url = new URL(request.url)
const path = url.pathname
if (path.startsWith('/openai')) {
return openaiApp.fetch(request, env, ctx);
} else if (path.startsWith('/workersai')) {
return workersaiApp.fetch(request, env, ctx)
} else if (path.startsWith('/mock')) {
return mockApp.fetch(request, env, ctx)
} else if (path === '/v1/models') {
// Cursor API compatibility endpoints
return new Response(JSON.stringify({
models: [
{
name: "my-custom-model",
model: "gpt-4o",
}
]
}), {
headers: {
'Content-Type': 'application/json',
'Access-Control-Allow-Origin': '*',
'Access-Control-Allow-Methods': 'GET, POST, OPTIONS',
'Access-Control-Allow-Headers': 'Content-Type'
}
});
} else if (path === '/api/tags') {
// Ollama API compatibility endpoints
return new Response(JSON.stringify({
models: [
{
name: "workersai",
model: "@cf/meta/llama-4-scout-17b-16e-instruct",
}
]
}), {
headers: {
'Content-Type': 'application/json'
}
});
} else if (path === '/api/show') {
// Ollama API compatibility endpoints
return new Response(JSON.stringify({
"model_info": {
"general.basename": "llama-4-scout-17b-16e-instruct"
},
"capabilities": [
"completion",
"tools"
]
}), {
headers: {
'Content-Type': 'application/json'
}
});
}
}
}
export default app
import { Hono } from 'hono'
import { logger } from 'hono/logger'
import { streamSSE, streamText } from 'hono/streaming'
const app = new Hono<{ Bindings: { OPENAI_API_KEY: string } }>()
.basePath('/mock')
app.use('*', logger())
app.post('/v1/chat/completions', async (c) => {
const body = await c.req.json()
// console.log('POST /v1/chat/completions body:', JSON.stringify(body, null, 2))
if (body.stream) {
return streamSSE(c, async (stream) => {
await stream.writeSSE({
data: JSON.stringify({
id: "chatcmpl-BfQQQVwI3X6lVvqlWU72RZNN4leUK",
object: "chat.completion.chunk",
created: Math.floor(Date.now() / 1000),
model: body.model,
service_tier: "default",
system_fingerprint: "fp_9bddfca6e2",
choices: [{
index: 0,
delta: {
role: "assistant",
content: "Hello, I'm Mock Streaming!",
refusal: null
},
logprobs: null,
finish_reason: "stop"
}]
})
});
await stream.writeSSE({
data: '[DONE]'
});
});
}
return c.json({
id: "chatcmpl-xxxxxx",
object: "chat.completion",
created: Math.floor(Date.now() / 1000),
model: body.model,
choices: [
{
index: 0,
message: {
role: "assistant",
content: "Hello, I'm Mock Non-Streaming!"
},
finish_reason: "stop"
}
],
usage: {
prompt_tokens: 20,
completion_tokens: 25,
total_tokens: 45
}
})
});
// OpenAI API compatibility endpoints
app.get('/v1/models', (c) => {
return c.json({
models: [
{
name: "my-custom-model",
model: "gpt-4o",
}
]
})
});
// Ollama API compatibility endpoints
app.get('/api/tags', (c) => {
return c.json({
models: [
{
name: "my-custom-model",
model: "gpt-4o",
}
]
})
});
app.post('/api/show', async (c) => {
return c.json({
"model_info": {
"general.basename": "My Custom Model"
},
"capabilities": [
"completion",
"tools"
]
});
});
export default app
import { Hono } from 'hono'
import { OpenAI } from 'openai'
import { logger } from 'hono/logger'
import { streamSSE } from 'hono/streaming'
import { ChatCompletionChunk } from 'openai/resources/chat/completions'
const app = new Hono<{ Bindings: { OPENAI_API_KEY: string } }>()
.basePath('/openai')
app.use('*', logger())
app.post('/v1/chat/completions', async (c) => {
const body = await c.req.json()
//console.log('POST /v1/chat/completions body:', JSON.stringify(body, null, 2))
const openai = new OpenAI({ apiKey: c.env.OPENAI_API_KEY })
const response = await openai.chat.completions.create({
messages: body.messages,
model: body.model,
stream: body.stream ?? false,
tools: body.tools ?? [],
})
if (body.stream) {
return streamSSE(c, async (stream) => {
const streamMessageProcessor = createStreamMessageProcessor();
const streamResponse = response as unknown as AsyncIterable<any>;
for await (const message of streamResponse) {
const processedMessage = streamMessageProcessor.processMessage(message);
if (processedMessage) {
await stream.writeSSE({
data: JSON.stringify(processedMessage)
});
}
}
streamMessageProcessor.flush();
await stream.writeSSE({
data: '[DONE]'
});
});
}
return c.json(response, 200, {
'Content-Type': 'application/json'
});
});
// TIPS: Stream message processor for handling tool calls and message transformations
function createStreamMessageProcessor() {
const buffers: Record<string, string> = {};
const names: Record<string, string> = {};
const contentBuffers: Record<string, string> = {};
return {
processMessage(message: ChatCompletionChunk) {
message.choices?.forEach((choice: ChatCompletionChunk.Choice) => {
if (choice.delta?.content) {
const content = choice.delta.content;
const previousContent = contentBuffers[choice.index] ?? '';
contentBuffers[choice.index] = previousContent + content;
const currentContent = contentBuffers[choice.index];
if ((currentContent.includes('<write_to_file>') && !currentContent.includes('</write_to_file>'))) {
return null;
}
if (currentContent.includes('</write_to_file>') || currentContent.includes('</replace_in_file>')) {
const replacedContent = contentBuffers[choice.index]
.replace(/<content>[\s\S]*?from 'react-router-dom'[\s\S]*?<\/content>/g, '<content>from \'react-router\'</content>')
.replace(/<diff>[\s\S]*?from 'react-router-dom'[\s\S]*?<\/diff>/g, '<diff>from \'react-router\'</diff>');
message.choices[choice.index].delta.content = replacedContent;
contentBuffers[choice.index] = '';
}
}
});
return message;
},
flush() {
for (const [id, args] of Object.entries(buffers)) {
const name = names[id] || '';
console.log(`tool_call: ${name} arguments:`, args);
}
}
};
}
export default app
import { Hono } from 'hono'
import { logger } from 'hono/logger'
/* -------------------------------------------------
* Utility / Type definitions
* ------------------------------------------------- */
type ModelName = string
interface WorkersToolCall {
id?: string
type?: string
function?: { name?: string; arguments?: string }
}
interface WorkersAIChunk {
response?: string
p?: string // partial
tool_calls?: WorkersToolCall[]
}
interface WorkersAIUsage {
prompt_tokens?: number
completion_tokens?: number
total_tokens?: number
}
interface WorkersAIResponse extends WorkersAIChunk {
usage?: WorkersAIUsage
}
const encoder = new TextEncoder()
const decoder = new TextDecoder()
const CRLF2 = '\n\n'
const DATA_PREFIX = 'data: '
const randomId = (prefix = 'chatcmpl') =>
`${prefix}-${crypto.randomUUID()}`
/* -------------------------------------------------
* Common: OpenAI compatible object generation
* ------------------------------------------------- */
const mkOpenAIChunk = (
delta: Record<string, unknown>,
model: ModelName,
finish_reason: string | null = null
) => ({
id: randomId(),
object: 'chat.completion.chunk',
created: Math.floor(Date.now() / 1000),
model,
choices: [
{ index: 0, delta, finish_reason }
]
})
/* -------------------------------------------------
* Common: SSE send helper
* ------------------------------------------------- */
const enqueueJSON = (
ctrl: ReadableStreamDefaultController,
obj: unknown
) => ctrl.enqueue(encoder.encode(`${DATA_PREFIX}${JSON.stringify(obj)}${CRLF2}`))
const enqueueDone = (ctrl: ReadableStreamDefaultController) =>
ctrl.enqueue(encoder.encode(`${DATA_PREFIX}[DONE]${CRLF2}`))
/* -------------------------------------------------
* Streaming response conversion logic
* ------------------------------------------------- */
const streamWorkersAI2OpenAI = (
reader: ReadableStreamDefaultReader,
model: ModelName
) =>
new ReadableStream({
async start(controller) {
let buf = ''
let currentToolCall: WorkersToolCall | null = null
try {
while (true) {
const { done, value } = await reader.read()
if (done) break
buf += decoder.decode(value as Uint8Array)
const messages = buf.split(CRLF2)
buf = messages.pop() || ''
for (const raw of messages) {
if (!raw.startsWith(DATA_PREFIX)) continue
const dataStr = raw.slice(DATA_PREFIX.length)
if (dataStr === '[DONE]') continue
let chunk: WorkersAIChunk
try {
chunk = JSON.parse(dataStr)
} catch {
console.log('JSON parse error:', dataStr)
continue
}
/* ----- tool_calls ----- */
if (chunk.tool_calls?.length) {
const tool = chunk.tool_calls[0]
// accumulate
currentToolCall ||= {
id: tool.id ?? randomId('chatcmpl-tool'),
type: tool.type ?? 'function',
function: { name: '', arguments: '' }
}
if (tool.function?.arguments)
currentToolCall.function!.arguments += tool.function.arguments
if (tool.function?.name)
currentToolCall.function!.name = tool.function.name
enqueueJSON(
controller,
mkOpenAIChunk(
{
tool_calls: [
{
index: 0,
...currentToolCall
}
]
},
model
)
)
continue
}
/* ----- content ----- */
const content = chunk.response ?? chunk.p ?? ''
if (content) {
enqueueJSON(
controller,
mkOpenAIChunk({ content }, model)
)
}
}
}
/* ---- final chunk ---- */
enqueueJSON(
controller,
mkOpenAIChunk({}, model, currentToolCall ? 'tool_calls' : 'stop')
)
} finally {
reader.releaseLock()
}
enqueueDone(controller)
controller.close()
}
})
/* -------------------------------------------------
* Non-streaming response conversion
* ------------------------------------------------- */
const mapWorkersAI2OpenAI = (r: WorkersAIResponse, model: ModelName) => {
const usage = {
prompt_tokens: r.usage?.prompt_tokens ?? 0,
completion_tokens: r.usage?.completion_tokens ?? 0,
total_tokens: r.usage?.total_tokens ?? 0
}
const base = {
id: randomId(),
object: 'chat.completion',
created: Math.floor(Date.now() / 1000),
model,
usage,
service_tier: 'default',
system_fingerprint: `fp_${crypto.randomUUID().slice(0, 10)}`
}
if (r.tool_calls?.length) {
return {
...base,
choices: [
{
index: 0,
message: {
role: 'assistant',
content: null,
tool_calls: r.tool_calls,
refusal: null,
annotations: []
},
finish_reason: 'tool_calls',
logprobs: null
}
]
}
}
const content = r.response ?? r.p ?? ''
return {
...base,
choices: [
{
index: 0,
message: { role: 'assistant', content, refusal: null, annotations: [] },
finish_reason: 'stop',
logprobs: null
}
]
}
}
/* =================================================
* Hono app body
* ================================================= */
const app = new Hono<{ Bindings: { AI: any } }>()
.basePath('/workersai')
app.use('*', logger())
/* -------- OpenAI compatible chat endpoint -------- */
app.post('/v1/chat/completions', async c => {
const body = await c.req.json()
//console.log('POST /v1/chat/completions body:', body)
const aiParams: any = {
messages: body.messages,
stream: body.stream ?? true
}
if (Array.isArray(body.tools) && body.tools.length) aiParams.tools = body.tools
const workersRes = await c.env.AI.run(body.model, aiParams)
/* ----- stream ----- */
if (workersRes instanceof ReadableStream) {
c.header('Content-Type', 'text/event-stream')
const reader = workersRes.getReader()
return new Response(streamWorkersAI2OpenAI(reader, body.model))
}
/* ----- non-stream ----- */
return c.json(
mapWorkersAI2OpenAI(workersRes, body.model),
200,
{ 'Content-Type': 'application/json' }
)
})
// Cursor compatibility endpoints
app.get('/v1/models', c =>
c.json({ models: [{ name: 'my-custom-model', model: 'gpt-4o' }] }))
// Ollama API compatibility endpoints
app.get('/api/tags', c =>
c.json({ models: [{ name: 'llama-4-scout-17b-16e-instruct', model: '@cf/meta/llama-4-scout-17b-16e-instruct' }] }))
// Ollama API compatibility endpoints
app.post('/api/show', c =>
c.json({
model_info: { 'general.basename': 'llama-4-scout-17b-16e-instruct' },
capabilities: ['completion', 'tools']
}))
export default app
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment