Skip to content

Instantly share code, notes, and snippets.

@dkohlsdorf
Created July 6, 2025 05:35
Show Gist options
  • Save dkohlsdorf/6c171c0891fdbe045669864cdf8aaf8e to your computer and use it in GitHub Desktop.
Save dkohlsdorf/6c171c0891fdbe045669864cdf8aaf8e to your computer and use it in GitHub Desktop.
I asked claude to write a claude code infra for llama
#!/usr/bin/env python3
"""
Llama3-based Agentic Coding System
Implements the Claude Code architecture with Llama3 as the LLM
"""
import re
import json
import subprocess
import os
from typing import Dict, Any, List, Optional
from dataclasses import dataclass
import asyncio
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
@dataclass
class Action:
type: str
params: Dict[str, Any]
@dataclass
class ActionResult:
success: bool
data: Any
error: Optional[str] = None
class CodingAPI:
"""The actual coding operations that can be performed"""
def __init__(self, workspace_dir: str = "./workspace"):
self.workspace_dir = workspace_dir
os.makedirs(workspace_dir, exist_ok=True)
def file_read(self, path: str) -> ActionResult:
try:
full_path = os.path.join(self.workspace_dir, path)
with open(full_path, 'r') as f:
content = f.read()
return ActionResult(success=True, data=content)
except Exception as e:
return ActionResult(success=False, data=None, error=str(e))
def file_write(self, path: str, content: str) -> ActionResult:
try:
full_path = os.path.join(self.workspace_dir, path)
os.makedirs(os.path.dirname(full_path), exist_ok=True)
with open(full_path, 'w') as f:
f.write(content)
return ActionResult(success=True, data=f"File written: {path}")
except Exception as e:
return ActionResult(success=False, data=None, error=str(e))
def run_command(self, command: str) -> ActionResult:
try:
result = subprocess.run(
command,
shell=True,
capture_output=True,
text=True,
cwd=self.workspace_dir,
timeout=30
)
return ActionResult(
success=result.returncode == 0,
data={"stdout": result.stdout, "stderr": result.stderr, "code": result.returncode}
)
except Exception as e:
return ActionResult(success=False, data=None, error=str(e))
def list_files(self, path: str = ".") -> ActionResult:
try:
full_path = os.path.join(self.workspace_dir, path)
files = os.listdir(full_path)
return ActionResult(success=True, data=files)
except Exception as e:
return ActionResult(success=False, data=None, error=str(e))
class ActionParser:
"""Parses LLM output for action commands"""
def __init__(self):
# Pattern to match ACTION: type\nPARAMS: {...}
self.action_pattern = re.compile(
r'ACTION:\s*(\w+)\s*\nPARAMS:\s*(\{.*?\})',
re.DOTALL | re.IGNORECASE
)
def parse_actions(self, text: str) -> List[Action]:
actions = []
matches = self.action_pattern.findall(text)
for action_type, params_str in matches:
try:
params = json.loads(params_str)
actions.append(Action(type=action_type.lower(), params=params))
except json.JSONDecodeError:
# Skip malformed actions
continue
return actions
class MiddlewareOrchestrator:
"""The magic middleware that routes between LLM and APIs"""
def __init__(self, coding_api: CodingAPI):
self.coding_api = coding_api
self.parser = ActionParser()
self.action_handlers = {
'file_read': self.coding_api.file_read,
'file_write': self.coding_api.file_write,
'run_command': self.coding_api.run_command,
'list_files': self.coding_api.list_files,
}
def execute_action(self, action: Action) -> ActionResult:
handler = self.action_handlers.get(action.type)
if not handler:
return ActionResult(
success=False,
data=None,
error=f"Unknown action type: {action.type}"
)
# Call the handler with unpacked parameters
try:
return handler(**action.params)
except TypeError as e:
return ActionResult(
success=False,
data=None,
error=f"Invalid parameters for {action.type}: {str(e)}"
)
def process_llm_output(self, llm_output: str) -> str:
"""Process LLM output, execute actions, and return augmented text"""
actions = self.parser.parse_actions(llm_output)
if not actions:
return llm_output
# Execute actions and build result text
result_text = llm_output
for action in actions:
result = self.execute_action(action)
# Format result for injection back into context
if result.success:
result_block = f"\n[RESULT] {action.type}: {result.data}\n"
else:
result_block = f"\n[ERROR] {action.type}: {result.error}\n"
result_text += result_block
return result_text
class Llama3Agent:
"""Main agent class that wraps Llama3 with agentic capabilities"""
def __init__(self, model_path: str = "meta-llama/Llama-3.2-3B-Instruct"):
self.tokenizer = AutoTokenizer.from_pretrained(model_path)
self.model = AutoModelForCausalLM.from_pretrained(
model_path,
torch_dtype=torch.float16,
device_map="auto" if torch.cuda.is_available() else None
)
self.coding_api = CodingAPI()
self.middleware = MiddlewareOrchestrator(self.coding_api)
self.conversation_history = []
# System prompt that teaches the model the action format
self.system_prompt = """You are an AI coding assistant with access to development tools.
Available actions:
- file_read: Read file contents
- file_write: Write content to file
- run_command: Execute shell commands
- list_files: List directory contents
To perform actions, use this exact format:
ACTION: action_name
PARAMS: {"param1": "value1", "param2": "value2"}
Example:
ACTION: file_read
PARAMS: {"path": "main.py"}
Always explain your reasoning before and after actions. Break complex tasks into steps."""
def generate_response(self, user_input: str, max_iterations: int = 5) -> str:
"""Generate response with potential multiple action-execution cycles"""
# Build conversation context
context = self.system_prompt + "\n\n"
for msg in self.conversation_history[-10:]: # Keep last 10 messages
context += f"{msg}\n"
context += f"Human: {user_input}\nAssistant: "
full_response = ""
for iteration in range(max_iterations):
# Generate LLM response
inputs = self.tokenizer.encode(context + full_response, return_tensors="pt")
with torch.no_grad():
outputs = self.model.generate(
inputs,
max_new_tokens=512,
temperature=0.7,
do_sample=True,
pad_token_id=self.tokenizer.eos_token_id
)
# Decode new tokens only
new_tokens = outputs[0][inputs.shape[1]:]
new_text = self.tokenizer.decode(new_tokens, skip_special_tokens=True)
# Check if we should stop (no more actions or natural stopping point)
if not self.middleware.parser.parse_actions(new_text):
full_response += new_text
break
# Process actions and continue generation
processed_text = self.middleware.process_llm_output(new_text)
full_response += processed_text
# Check for natural stopping
if any(stop in processed_text.lower() for stop in ["done", "complete", "finished"]):
break
# Update conversation history
self.conversation_history.append(f"Human: {user_input}")
self.conversation_history.append(f"Assistant: {full_response}")
return full_response
def main():
"""Demo of the Llama3 agentic coding system"""
print("Initializing Llama3 Agent...")
agent = Llama3Agent()
print("Agent ready! Type 'quit' to exit.")
while True:
user_input = input("\nUser: ")
if user_input.lower() == 'quit':
break
print("\nAgent: ", end="")
response = agent.generate_response(user_input)
print(response)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment