Skip to content

Instantly share code, notes, and snippets.

@elchead
Created May 28, 2025 07:20
Show Gist options
  • Save elchead/822f1c6c34bbef9d171a51ab71aa3d36 to your computer and use it in GitHub Desktop.
Save elchead/822f1c6c34bbef9d171a51ab71aa3d36 to your computer and use it in GitHub Desktop.
// docker run --pull=always -p 8080:8080 ghcr.io/edgelesssys/privatemode/privatemode-proxy:latest --apiKey <KEY>
// bun install @ai-sdk/openai-compatible ai zod
// bun run structured-image-output.js
import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { generateObject } from 'ai';
import { z } from 'zod';
import fs from 'fs';
import path from 'path';
const config = {
imagePath: './edgeless-rocks.png'
};
const llmstudio = createOpenAICompatible({
name: 'lmstudio',
baseURL: 'http://localhost:8080/v1',
});
const model = llmstudio('google/gemma-3-27b-it');
// Define structured output schema using Zod
const imageAnalysisSchema = z.object({
description: z.string().describe('A detailed description of what is in the image'),
objects: z.array(z.object({
name: z.string().describe('Name of the detected object'),
confidence: z.number().min(0).max(1).describe('Confidence score between 0 and 1'),
location: z.string().describe('General location of the object in the image')
})).describe('List of detected objects in the image'),
colors: z.array(z.string()).describe('Dominant colors in the image'),
mood: z.enum(['cheerful', 'serious', 'mysterious', 'energetic', 'calm', 'dramatic']).describe('Overall mood of the image'),
technicalDetails: z.object({
estimatedResolution: z.string().describe('Estimated resolution category'),
imageType: z.enum(['photograph', 'illustration', 'diagram', 'screenshot', 'other']).describe('Type of image'),
quality: z.enum(['low', 'medium', 'high']).describe('Perceived image quality')
}).describe('Technical details about the image')
});
function encodeImageToBase64(imagePath) {
try {
const imageBuffer = fs.readFileSync(imagePath);
return imageBuffer.toString('base64');
} catch (error) {
throw new Error(`Failed to read image file: ${error.message}`);
}
}
async function analyzeImageWithStructuredOutput() {
try {
// Check if image exists
if (!fs.existsSync(config.imagePath)) {
throw new Error(`Image file not found at ${config.imagePath}`);
}
console.log('Starting image analysis with structured output...');
const startTime = Date.now();
// Encode image to base64
const base64Image = encodeImageToBase64(config.imagePath);
// Generate structured output using Vercel AI SDK
const result = await generateObject({
model: model,
schema: imageAnalysisSchema,
messages: [
{
role: 'user',
content: [
{
type: 'text',
text: 'Analyze this image and provide detailed structured information about it.'
},
{
type: 'image',
image: `data:image/png;base64,${base64Image}`
}
]
}
]
});
const endTime = Date.now();
const responseTime = (endTime - startTime) / 1000;
console.log(`\nRequest-to-response time: ${responseTime.toFixed(2)} seconds`);
// Optional: Save structured output to JSON file
const outputPath = path.join(process.cwd(), 'image-analysis-output.json');
fs.writeFileSync(outputPath, JSON.stringify(result.object, null, 2));
console.log(`\nStructured output saved to: ${outputPath}`);
return result.object;
} catch (error) {
console.error('Error during structured output generation:', error.message);
throw error;
}
}
async function main() {
try {
await analyzeImageWithStructuredOutput();
} catch (error) {
console.error('Script execution failed:', error.message);
process.exit(1);
}
}
// Run the script
if (import.meta.main) {
main();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment