Skip to content

Instantly share code, notes, and snippets.

@JupyterJones
Created May 14, 2026 06:53
Show Gist options
  • Select an option

  • Save JupyterJones/995d99b56c8dbd275b78138f67e39ef6 to your computer and use it in GitHub Desktop.

Select an option

Save JupyterJones/995d99b56c8dbd275b78138f67e39ef6 to your computer and use it in GitHub Desktop.
Comfy_MasterDirector is used to created guided Comfy mp4s generate_cliche_free_story with various random transitions
#!/usr/bin/env python3
import os
import time
import threading
import requests
import subprocess
import datetime
import traceback
import re
import random
from flask import Flask, request, jsonify, render_template_string, send_from_directory
# ============================================================
# CONFIGURATION
# ============================================================
COMFY_URL = "http://192.168.1.41:5001"
OLLAMA_URL = "http://192.168.1.41:11434/api/generate"
KOKORO_URL = "http://localhost:8880/v1/audio/speech"
OLLAMA_MODEL = "llama3.2:3b"
# Support for high-latency local inference
AI_TIMEOUT = 2000
BASE_OUT_DIR = os.path.abspath("./Directormay14")
os.makedirs(BASE_OUT_DIR, exist_ok=True)
VOICES=['bf_alice', 'af_sky', 'am_adam', 'bm_george', 'am_santa']
# Safe FFmpeg transitions
TRANSITIONS = ['fade', 'wipeleft', 'wiperight', 'slideleft', 'slideright', 'circlecrop', 'pixelize', 'dissolve']
state = {
"running": False,
"status": "IDLE",
"images": [],
"story": "",
"video_path": "",
"log": "System Ready"
}
# ============================================================
# OLLAMA LOGIC: STORY & DIRECTION
# ============================================================
def ask_ollama(prompt):
try:
logit(f"Calling Ollama (Timeout: {AI_TIMEOUT}s)...")
r = requests.post(OLLAMA_URL, json={"model": OLLAMA_MODEL, "prompt": prompt, "stream": False}, timeout=AI_TIMEOUT)
return r.json().get("response", "").strip()
except Exception as e:
logit(f"Ollama Error: {e}")
return ""
def generate_cliche_free_story(user_prompt):
prompt = f"""
STRICT TASK: Write a cinematic found-footage horror story in exactly 12 paragraphs.
Each paragraph MUST be exactly 3 sentences.
Topic: {user_prompt}
STYLE: Clinical, observational, technical found-footage. Focus on POV.
FORBIDDEN WORDS/CLICHÉS:
- No "chill down my spine", "shivers", or "blood ran cold".
- No "pounding in anticipation", "heart raced", or "pulses quickened".
- No "thick with anticipation" or "heavy with dread".
- No "I couldn't believe my eyes" or "Suddenly, I realized".
- Avoid all internal character emotions. Describe only what the camera sees and hears.
"""
return ask_ollama(prompt)
def generate_visual_bible(story_text):
prompt = f"""
Create a 2-sentence visual technical guide for a AI camera.
Focus on: POV perspective, Lens quality (VHS/Grainy), Lighting (flickering, dim), and recurring textures (rusted metal, slime, concrete).
STRICT: No abstract emotional words.
STORY: {story_text[:1000]}
"""
return ask_ollama(prompt)
def generate_segment_prompts(bible, paragraph):
prompt = f"""
STRICT TASK: Convert the following paragraph into TWO specific visual image prompts for Stable Diffusion.
STYLE GUIDE: {bible}
PARAGRAPH: {paragraph}
RULES:
1. POV perspective only. Describe physical objects and lighting.
2. Format your response exactly like this:
SHOT_A: [Visual description]
SHOT_B: [Visual description]
"""
response = ask_ollama(prompt)
shot_a, shot_b = paragraph, paragraph
try:
a_match = re.search(r"SHOT_A:(.*?)(?=SHOT_B:|$)", response, re.DOTALL | re.IGNORECASE)
b_match = re.search(r"SHOT_B:(.*)", response, re.DOTALL | re.IGNORECASE)
if a_match: shot_a = a_match.group(1).strip()
if b_match: shot_b = b_match.group(1).strip()
except: pass
return shot_a, shot_b
# ============================================================
# UTILS & LOGGING
# ============================================================
def logit(*args):
msg = " ".join(map(str, args))
ts = datetime.datetime.now().strftime("%H:%M:%S")
line = f"[{ts}] {msg}"
print(line, flush=True)
state["log"] = msg
try:
with open(os.path.join(BASE_OUT_DIR, "debug_log.txt"), "a", encoding="utf-8") as f:
f.write(line + "\n")
except: pass
def get_audio_duration(path):
try:
cmd = ["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", path]
r = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
return float(r.stdout.strip())
except: return 5.0
def fetch_comfy_assets():
try:
checkpoints = requests.get(f"{COMFY_URL}/models/checkpoints", timeout=30).json()
loras = ["None"] + requests.get(f"{COMFY_URL}/models/loras", timeout=30).json()
return checkpoints, loras
except: return ["v1-5-pruned-emaonly.safetensors"], ["None"]
# ============================================================
# THE PIPELINE
# ============================================================
def run_segmented_pipeline(user_prompt, model, l1, l2, voice, neg):
try:
state["running"] = True
state["images"], state["video_path"], state["story"] = [], "", ""
session_id = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
out_dir = os.path.join(BASE_OUT_DIR, session_id)
os.makedirs(out_dir, exist_ok=True)
# 1. STORY & BIBLE
state["status"] = "AI WRITING"
full_text = generate_cliche_free_story(user_prompt)
if not full_text: raise Exception("Story generation failed.")
state["story"] = full_text
bible = generate_visual_bible(full_text)
logit(f"Visual Bible: {bible}")
segments = [s.strip() for s in full_text.split('\n') if len(s.strip()) > 20][:12]
segment_videos = []
# 2. PROCESS SEGMENTS
for i, seg_text in enumerate(segments):
logit(f"--- Processing Segment {i+1}/12 ---")
state["status"] = f"DIRECTING {i+1}/12"
shot_a, shot_b = generate_segment_prompts(bible, seg_text)
seg_path = os.path.join(out_dir, f"seg_{i}")
os.makedirs(seg_path, exist_ok=True)
# Audio Generation
audio_res = requests.post(KOKORO_URL, json={"input": seg_text, "voice": voice, "format": "mp3"}, timeout=AI_TIMEOUT)
clean_m4a = os.path.join(seg_path, "audio.m4a")
tmp_mp3 = os.path.join(seg_path, "raw.mp3")
with open(tmp_mp3, "wb") as f: f.write(audio_res.content)
subprocess.run(["ffmpeg", "-y", "-i", tmp_mp3, "-c:a", "aac", "-b:a", "192k", clean_m4a], capture_output=True)
duration = get_audio_duration(clean_m4a)
img_dur = (duration / 2) + 0.5 # Overlap for transition
# ComfyUI Image Generation
seg_images = []
for v, visual_prompt in enumerate([shot_a, shot_b]):
state["status"] = f"IMAGE {i+1}.{v+1}"
seed = int(time.time()) + i + v
workflow = {
"1": {"class_type":"CheckpointLoaderSimple","inputs":{"ckpt_name":model}},
"2": {"class_type":"CLIPTextEncode","inputs":{"text":f"{visual_prompt}, vhs horror style, pov, {bible}","clip":["1",1]}},
"3": {"class_type":"EmptyLatentImage","inputs":{"width":512,"height":768,"batch_size":1}},
"7": {"class_type":"CLIPTextEncode","inputs":{"text":neg,"clip":["1",1]}},
"4": {"class_type":"KSampler","inputs":{"seed":seed,"steps":20,"cfg":7,"sampler_name":"euler","scheduler":"normal","denoise":1,"model":["1",0],"positive":["2",0],"negative":["7",0],"latent_image":["3",0]}},
"5": {"class_type":"VAEDecode","inputs":{"samples":["4",0],"vae":["1",2]}},
"6": {"class_type":"SaveImage","inputs":{"filename_prefix":"S","images":["5",0]}}
}
if l1 != "None":
workflow["10"] = {"class_type":"LoraLoader","inputs":{"lora_name":l1,"strength_model":1,"strength_clip":1,"model":["1",0],"clip":["1",1]}}
workflow["4"]["inputs"]["model"] = ["10", 0]
workflow["2"]["inputs"]["clip"] = ["10", 1]
p_id = requests.post(f"{COMFY_URL}/prompt", json={"prompt": workflow}).json()["prompt_id"]
while True:
hist = requests.get(f"{COMFY_URL}/history").json()
if p_id in hist:
logit(f"Comfy finished {i}.{v}. Sleeping 3s for disk write...")
time.sleep(3) # Safety Delay
fn = hist[p_id]["outputs"]["6"]["images"][0]["filename"]
img_data = requests.get(f"{COMFY_URL}/view?filename={fn}").content
img_fn = os.path.join(seg_path, f"img_{v}.png")
with open(img_fn, "wb") as f: f.write(img_data)
if os.path.exists(img_fn) and os.path.getsize(img_fn) > 0:
seg_images.append(img_fn)
state["images"].append(img_fn)
break
time.sleep(2)
# 3. MUX SEGMENT WITH XFADE
state["status"] = f"MUXING {i+1}/12"
trans = random.choice(TRANSITIONS)
seg_video = os.path.join(seg_path, "out.mp4")
offset = (duration / 2) - 0.5 # Crossfade start time
filter_complex = (
f"[0:v]scale=512:768,setsar=1[v0]; "
f"[1:v]scale=512:768,setsar=1[v1]; "
f"[v0][v1]xfade=transition={trans}:duration=1:offset={offset},format=yuv420p[v]"
)
cmd = [
"ffmpeg", "-y",
"-loop", "1", "-t", str(img_dur), "-i", seg_images[0],
"-loop", "1", "-t", str(img_dur), "-i", seg_images[1],
"-i", clean_m4a,
"-filter_complex", filter_complex,
"-map", "[v]", "-map", "2:a",
"-c:v", "libx264", "-pix_fmt", "yuv420p", "-shortest", seg_video
]
subprocess.run(cmd, capture_output=True)
segment_videos.append(seg_video)
# 4. FINAL ASSEMBLY
state["status"] = "FINAL ASSEMBLY"
final_list = os.path.join(out_dir, "list.txt")
with open(final_list, "w") as f:
for v in segment_videos: f.write(f"file '{os.path.abspath(v)}'\n")
final_video = os.path.join(out_dir, "COMPLETE_MOVIE.mp4")
subprocess.run(["ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", final_list, "-c", "copy", final_video])
state["video_path"] = final_video
state["status"] = "DONE"
except Exception as e:
logit(f"FATAL PIPELINE ERROR: {e}")
traceback.print_exc()
state["status"] = "ERROR"
finally:
state["running"] = False
# ============================================================
# WEB UI (FLASK)
# ============================================================
HTML_TEMPLATE = """
<!DOCTYPE html>
<html>
<head>
<title>POV Cinema Studio</title>
<style>
body { background:#0a0a0a; color:#00ff00; font-family:monospace; padding:20px; }
.container { display:flex; gap:20px; }
.panel { flex:1; background:#111; padding:15px; border:1px solid #333; }
select, textarea { width:100%; background:#000; color:#0f0; border:1px solid #0f0; padding:8px; margin-bottom:10px; }
button { width:100%; padding:15px; background:#00ff00; color:#000; font-weight:bold; cursor:pointer; border:none; }
button:disabled { background:#333; color:#666; }
#log { height:100px; overflow-y:auto; font-size:11px; background:#000; padding:10px; border:1px solid #222; margin-top:10px; }
.gallery { display:flex; flex-wrap:wrap; gap:5px; margin-top:10px; }
.thumb { width:65px; height:100px; object-fit:cover; border:1px solid #333; }
video { width:100%; border:1px solid #0f0; margin-top:10px; }
#storybox { white-space:pre-wrap; background:#000; padding:10px; border:1px solid #222; color:#aaa; font-size:12px; height:200px; overflow-y:auto; }
</style>
</head>
<body>
<h1>POV CINEMA STUDIO V6</h1>
<div class="container">
<div class="panel">
PROMPT: <textarea id="prompt" rows="3">A claustrophobic exploration of a flooded industrial basement.</textarea>
MODEL: <select id="model">{% for c in checkpoints %}<option>{{c}}</option>{% endfor %}</select>
VOICE: <select id="voice">{% for v in voices %}<option>{{v}}</option>{% endfor %}</select>
LORA: <select id="l1">{% for l in loras %}<option>{{l}}</option>{% endfor %}</select>
NEG: <textarea id="neg">blurry, low quality, text, watermark, cartoon</textarea>
<button id="go" onclick="start()">INITIATE PRODUCTION</button>
</div>
<div class="panel">
STATUS: <span id="status">IDLE</span>
<div id="log"></div>
<div id="video_area"></div>
<div id="gallery" class="gallery"></div>
</div>
</div>
<h3>STORY SCRIPT (CLICHÉ-FREE):</h3>
<div id="storybox">...</div>
<script>
async function start() {
document.getElementById('go').disabled = true;
const data = {
prompt: document.getElementById('prompt').value,
model: document.getElementById('model').value,
voice: document.getElementById('voice').value,
l1: document.getElementById('l1').value,
l2: "None",
neg: document.getElementById('neg').value
};
await fetch('/start', {method:'POST', headers:{'Content-Type':'application/json'}, body:JSON.stringify(data)});
}
setInterval(async () => {
const res = await fetch('/state');
const s = await res.json();
document.getElementById('status').innerText = s.status;
document.getElementById('log').innerText = s.log;
document.getElementById('storybox').innerText = s.story || "Waiting for generation...";
if(!s.running) document.getElementById('go').disabled = false;
if(s.images.length > 0) {
document.getElementById('gallery').innerHTML = s.images.map(p => `<img src="/file?path=${encodeURIComponent(p)}" class="thumb">`).join('');
}
if(s.video_path && !document.querySelector('video')) {
document.getElementById('video_area').innerHTML = `<h3>FINAL OUTPUT:</h3><video controls autoplay><source src="/file?path=${encodeURIComponent(s.video_path)}" type="video/mp4"></video>`;
}
}, 2000);
</script>
</body>
</html>
"""
app = Flask(__name__)
@app.route("/")
def index():
checkpoints, loras = fetch_comfy_assets()
return render_template_string(HTML_TEMPLATE, checkpoints=checkpoints, loras=loras, voices=VOICES)
@app.route("/start", methods=["POST"])
def start_pipeline():
if state["running"]: return "BUSY", 400
d = request.json
threading.Thread(target=run_segmented_pipeline, args=(
d["prompt"], d["model"], d["l1"], d["l2"], d["voice"], d["neg"]
), daemon=True).start()
return "OK"
@app.route("/state")
def get_state(): return jsonify(state)
@app.route("/file")
def get_file():
p = request.args.get("path")
return send_from_directory(os.path.dirname(p), os.path.basename(p))
if __name__ == "__main__":
app.run(host="0.0.0.0", port=5051)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment