Skip to content

Instantly share code, notes, and snippets.

@apoorvalal
Created February 22, 2026 15:27
Show Gist options
  • Select an option

  • Save apoorvalal/254909efcaab1fc20c38ecd59c745ecb to your computer and use it in GitHub Desktop.

Select an option

Save apoorvalal/254909efcaab1fc20c38ecd59c745ecb to your computer and use it in GitHub Desktop.
EuroSAT + DSPy MIPROv2 (local Qwen3-VL) scripts
#!/usr/bin/env python3
"""Classify N random EuroSAT RGB images using DSPy + local Qwen3-VL endpoint.
- Uses dspy.LM + dspy.Predict so we can later run GEPA.
- Uses a lightweight `FileImage` custom type to send `file://...` image references
(so we avoid base64-encoding every image).
This requires the llama-server to be started with `--media-path` pointing at the
project root (the provided server script does this).
Prereqs
- Download/extract EuroSAT:
/Users/alal/miniforge3/envs/llm/bin/python scripts/download_eurosat.py --dest data
- Start Qwen3-VL llama-server (separate terminal):
PORT=8092 scripts/start_qwen_vlm_server.sh
Default endpoint
- OPENAI_BASE_URL=http://127.0.0.1:8092/v1
- OPENAI_MODEL=qwen3-vl-8b
Outputs
- results/eurosatN_predictions.jsonl
- results/eurosatN_summary.txt
"""
from __future__ import annotations
import argparse
import json
import os
import random
import re
import sys
import urllib.parse
import urllib.request
from dataclasses import dataclass
from pathlib import Path
import dspy
# Avoid base64 bloating by sending file:// paths (server must allow via --media-path)
class FileImage(dspy.Type):
path: str
def format(self):
return [{"type": "image_url", "image_url": {"url": f"file://{self.path}"}}]
try:
from tqdm import tqdm
except Exception: # pragma: no cover
def tqdm(it, **kwargs):
return it
def _health_url_from_base(base_url: str) -> str:
u = urllib.parse.urlparse(base_url)
if not u.scheme or not u.netloc:
raise ValueError(f"Bad base URL: {base_url}")
return f"{u.scheme}://{u.netloc}/health"
def _preflight_check_server(base_url: str, timeout_s: float = 2.0) -> None:
health = _health_url_from_base(base_url)
try:
with urllib.request.urlopen(health, timeout=timeout_s) as r: # noqa: S310
if r.status != 200:
raise RuntimeError(f"health status={r.status}")
except Exception as e:
msg = (
"Local VLM server is not reachable.\n"
f" expected health endpoint: {health}\n"
f" error: {e}\n\n"
"Start it from the project root in a separate terminal:\n"
" cd ~/Dropbox/1_Research/doodles/gepa_small_vlm\n"
" PORT=8092 scripts/start_qwen_vlm_server.sh\n"
)
print(msg, file=sys.stderr)
raise SystemExit(2)
def norm_label(s: str) -> str:
s = s.strip().lower()
s = re.sub(r"[^a-z]+", "_", s)
s = re.sub(r"_+", "_", s).strip("_")
return s
@dataclass
class PredRow:
path: str
true_label: str
pred_label: str
ok: bool
raw: str
class EuroSATClassifySig(dspy.Signature):
"""Classify a EuroSAT RGB satellite image.
Choose exactly one label from the provided label_set.
Reply with ONLY the label.
"""
image: FileImage = dspy.InputField(desc="RGB satellite image")
label_set: str = dspy.InputField(desc="Comma-separated list of allowed labels")
label: str = dspy.OutputField(desc="Must be exactly one of the labels in label_set")
class EuroSATClassifier(dspy.Module):
def __init__(self, labels: list[str]):
super().__init__()
self.labels = labels
self.label_set = ", ".join(labels)
self.classify = dspy.Predict(EuroSATClassifySig)
def forward(self, image: FileImage):
return self.classify(image=image, label_set=self.label_set)
def list_images(eurosat_root: Path) -> list[Path]:
if not eurosat_root.exists():
raise FileNotFoundError(f"Missing EuroSAT folder: {eurosat_root}")
imgs = list(eurosat_root.rglob("*.jpg"))
if not imgs:
raise RuntimeError(f"No .jpg files under: {eurosat_root}")
return imgs
def infer_labels(eurosat_root: Path) -> list[str]:
dirs = [p for p in eurosat_root.iterdir() if p.is_dir()]
labels = sorted({norm_label(p.name) for p in dirs})
if not labels:
raise RuntimeError(f"No class directories found under: {eurosat_root}")
return labels
def true_label_from_path(p: Path) -> str:
return norm_label(p.parent.name)
def main() -> None:
ap = argparse.ArgumentParser()
ap.add_argument(
"--data-root",
type=Path,
default=Path("data/EuroSAT/2750"),
help="Folder containing class subfolders (EuroSAT/2750)",
)
ap.add_argument("--n", type=int, default=20)
ap.add_argument("--seed", type=int, default=42)
ap.add_argument("--base-url", type=str, default=os.getenv("OPENAI_BASE_URL", "http://127.0.0.1:8092/v1"))
ap.add_argument("--api-key", type=str, default=os.getenv("OPENAI_API_KEY", "local"))
ap.add_argument("--model", type=str, default=os.getenv("OPENAI_MODEL", "qwen3-vl-8b"))
ap.add_argument("--max-tokens", type=int, default=20)
args = ap.parse_args()
_preflight_check_server(args.base_url)
random.seed(args.seed)
project_root = Path(__file__).resolve().parents[1]
data_root = args.data_root
if not data_root.is_absolute():
data_root = project_root / data_root
labels = infer_labels(data_root)
# Configure DSPy LM
lm = dspy.LM(
model=f"openai/{args.model}",
api_base=args.base_url,
api_key=args.api_key,
temperature=0.0,
max_tokens=args.max_tokens,
)
dspy.configure(lm=lm)
# Label set is provided via the `label_set` input field in the signature.
imgs = list_images(data_root)
sample = random.sample(imgs, k=min(args.n, len(imgs)))
out_dir = project_root / "results"
out_dir.mkdir(parents=True, exist_ok=True)
out_jsonl = out_dir / f"eurosat{len(sample)}_predictions.jsonl"
out_txt = out_dir / f"eurosat{len(sample)}_summary.txt"
clf = EuroSATClassifier(labels=labels)
rows: list[PredRow] = []
for p in tqdm(sample, total=len(sample), desc="classifying", unit="img"):
true_label = true_label_from_path(p)
rel = p.relative_to(project_root).as_posix()
pred = clf(image=FileImage(path=rel))
raw = getattr(pred, "label", "") or ""
pred_label = norm_label(raw)
ok = pred_label == true_label
rows.append(PredRow(path=str(p), true_label=true_label, pred_label=pred_label, ok=ok, raw=raw))
acc = sum(r.ok for r in rows) / len(rows) if rows else 0.0
with out_jsonl.open("w") as f:
for r in rows:
f.write(json.dumps(r.__dict__, ensure_ascii=False) + "\n")
with out_txt.open("w") as f:
f.write(f"n={len(rows)} seed={args.seed}\n")
f.write(f"base_url={args.base_url} model={args.model}\n")
f.write(f"labels={labels}\n")
f.write(f"accuracy={acc:.3f}\n")
print(out_txt.read_text())
if __name__ == "__main__":
main()
#!/usr/bin/env python3
"""Download + extract EuroSAT (RGB) dataset.
EuroSAT URL (RGB):
https://madm.dfki.de/files/sentinel/EuroSAT.zip
The zip contains a top-level folder `EuroSAT/2750/<ClassName>/*.jpg`.
This script is stdlib-only.
"""
from __future__ import annotations
import argparse
import shutil
import ssl
import zipfile
import urllib.request
from pathlib import Path
try:
import certifi
except Exception: # pragma: no cover
certifi = None
EUROSAT_URL = "https://madm.dfki.de/files/sentinel/EuroSAT.zip"
def download(url: str, out_path: Path) -> None:
out_path.parent.mkdir(parents=True, exist_ok=True)
if out_path.exists() and out_path.stat().st_size > 0:
print(f"[download] already exists: {out_path}")
return
print(f"[download] {url} -> {out_path}")
tmp = out_path.with_suffix(out_path.suffix + ".partial")
if tmp.exists():
tmp.unlink()
# Stream download (more robust than urlretrieve, and easier to customize).
headers = {
# Some hosts return 403 to default Python UA.
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36",
"Accept": "*/*",
}
ctx = None
if certifi is not None:
ctx = ssl.create_default_context(cafile=certifi.where())
req = urllib.request.Request(url, headers=headers)
try:
with urllib.request.urlopen(req, context=ctx) as r, open(tmp, "wb") as f: # noqa: S310
shutil.copyfileobj(r, f)
except Exception as e:
# Fallback: curl tends to have a more reliable SSL trust store on macOS.
import subprocess
print(f"[download] urllib failed ({e}); falling back to curl")
cmd = [
"curl",
"-L",
"-A",
headers["User-Agent"],
"-o",
str(tmp),
url,
]
subprocess.run(cmd, check=True)
tmp.rename(out_path)
def extract(zip_path: Path, dest_dir: Path) -> Path:
dest_dir.mkdir(parents=True, exist_ok=True)
# The official zip currently extracts to dest_dir/2750/...
# We normalize to dest_dir/EuroSAT/2750/...
root = dest_dir / "EuroSAT"
images_dir = root / "2750"
if images_dir.exists() and any(images_dir.rglob("*.jpg")):
print(f"[extract] already extracted: {images_dir}")
return images_dir
print(f"[extract] {zip_path} -> {dest_dir}")
with zipfile.ZipFile(zip_path) as z:
z.extractall(dest_dir)
extracted_2750 = dest_dir / "2750"
if extracted_2750.exists() and any(extracted_2750.rglob("*.jpg")):
root.mkdir(parents=True, exist_ok=True)
# If a stale target exists, remove it first.
if images_dir.exists():
# conservative: only remove empty/partial dirs
for _ in range(3):
try:
images_dir.rmdir()
except OSError:
break
extracted_2750.rename(images_dir)
if not images_dir.exists():
raise RuntimeError(f"Expected extracted folder not found: {images_dir}")
return images_dir
def main() -> None:
ap = argparse.ArgumentParser()
ap.add_argument("--dest", type=Path, default=Path("data"), help="Destination directory")
ap.add_argument("--url", type=str, default=EUROSAT_URL)
args = ap.parse_args()
dest = args.dest
zip_path = dest / "EuroSAT.zip"
download(args.url, zip_path)
images_dir = extract(zip_path, dest)
n_imgs = len(list(images_dir.rglob("*.jpg")))
n_classes = len([p for p in images_dir.iterdir() if p.is_dir()])
print(f"[done] images dir: {images_dir}")
print(f"[done] classes: {n_classes}")
print(f"[done] jpg files: {n_imgs}")
if __name__ == "__main__":
main()
#!/usr/bin/env python3
"""Run DSPy MIPROv2 on EuroSAT classification (multimodal).
This is an experiment driver to see whether MIPROv2 (instruction + few-shot selection)
can improve accuracy over the baseline on EuroSAT.
Notes on multimodal:
- We use a lightweight FileImage custom DSPy type that formats to an OpenAI-style
`image_url` content block with a `file://...` URL.
- This requires llama-server to be started with `--media-path` pointing at the project root.
Run order:
1) Ensure data exists:
/Users/alal/miniforge3/envs/llm/bin/python scripts/download_eurosat.py --dest data
2) Start server in another terminal:
cd ~/Dropbox/1_Research/doodles/gepa_small_vlm
PORT=8092 scripts/start_qwen_vlm_server.sh
3) Run this script:
/Users/alal/miniforge3/envs/llm/bin/python scripts/miprov2_eurosat.py
Outputs:
- results/miprov2_eurosat_report.txt
- results/miprov2_eurosat_optimized.json (DSPy program state)
"""
from __future__ import annotations
import argparse
import os
import random
import re
import sys
import urllib.parse
import urllib.request
from pathlib import Path
import dspy
from dspy.teleprompt import MIPROv2
class FileImage(dspy.Type):
path: str
def format(self):
return [{"type": "image_url", "image_url": {"url": f"file://{self.path}"}}]
def _health_url_from_base(base_url: str) -> str:
u = urllib.parse.urlparse(base_url)
if not u.scheme or not u.netloc:
raise ValueError(f"Bad base URL: {base_url}")
return f"{u.scheme}://{u.netloc}/health"
def _preflight_check_server(base_url: str, timeout_s: float = 2.0) -> None:
health = _health_url_from_base(base_url)
try:
with urllib.request.urlopen(health, timeout=timeout_s) as r: # noqa: S310
if r.status != 200:
raise RuntimeError(f"health status={r.status}")
except Exception as e:
msg = (
"Local VLM server is not reachable.\n"
f" expected health endpoint: {health}\n"
f" error: {e}\n\n"
"Start it from the project root in a separate terminal:\n"
" cd ~/Dropbox/1_Research/doodles/gepa_small_vlm\n"
" PORT=8092 scripts/start_qwen_vlm_server.sh\n"
)
print(msg, file=sys.stderr)
raise SystemExit(2)
def norm_label(s: str) -> str:
s = (s or "").strip().lower()
s = re.sub(r"[^a-z]+", "_", s)
s = re.sub(r"_+", "_", s).strip("_")
return s
def infer_labels(eurosat_root: Path) -> list[str]:
dirs = [p for p in eurosat_root.iterdir() if p.is_dir()]
labels = sorted({norm_label(p.name) for p in dirs})
if not labels:
raise RuntimeError(f"No class directories found under: {eurosat_root}")
return labels
def list_images(eurosat_root: Path) -> list[Path]:
imgs = list(eurosat_root.rglob("*.jpg"))
if not imgs:
raise RuntimeError(f"No .jpg files under: {eurosat_root}")
return imgs
def true_label_from_path(p: Path) -> str:
return norm_label(p.parent.name)
class EuroSATSig(dspy.Signature):
"""Classify a EuroSAT RGB satellite image.
Choose exactly one label from label_set. Reply with ONLY the label.
"""
image: FileImage = dspy.InputField()
label_set: str = dspy.InputField(desc="Comma-separated list of allowed labels")
label: str = dspy.OutputField(desc="Must be exactly one of the labels in label_set")
class EuroSATProgram(dspy.Module):
def __init__(self, labels: list[str]):
super().__init__()
self.default_label_set = ", ".join(labels)
self.classify = dspy.Predict(EuroSATSig)
def forward(self, image: FileImage, label_set: str | None = None):
# Keep label_set as an input so optimizers can see/use it, but default it.
label_set = label_set or self.default_label_set
return self.classify(image=image, label_set=label_set)
def make_example(project_root: Path, image_path_abs: Path, label_set: str) -> dspy.Example:
rel = image_path_abs.relative_to(project_root).as_posix()
return (
dspy.Example(
image=FileImage(path=rel),
label_set=label_set,
label=true_label_from_path(image_path_abs),
).with_inputs("image", "label_set")
)
def acc_metric(gold: dspy.Example, pred: dspy.Prediction, trace=None):
return 1.0 if norm_label(getattr(pred, "label", "")) == norm_label(gold.label) else 0.0
def eval_accuracy(program: dspy.Module, dataset: list[dspy.Example]) -> float:
if not dataset:
return 0.0
ok = 0
for ex in dataset:
out = program(**ex.inputs())
ok += int(norm_label(getattr(out, "label", "")) == norm_label(ex.label))
return ok / len(dataset)
def main() -> None:
ap = argparse.ArgumentParser()
ap.add_argument("--data-root", type=Path, default=Path("data/EuroSAT/2750"))
ap.add_argument("--train-n", type=int, default=200)
ap.add_argument("--val-n", type=int, default=200)
ap.add_argument("--seed", type=int, default=42)
ap.add_argument("--base-url", type=str, default=os.getenv("OPENAI_BASE_URL", "http://127.0.0.1:8092/v1"))
ap.add_argument("--api-key", type=str, default=os.getenv("OPENAI_API_KEY", "local"))
ap.add_argument("--model", type=str, default=os.getenv("OPENAI_MODEL", "qwen3-vl-8b"))
ap.add_argument("--auto", type=str, default="light", choices=["light", "medium", "heavy"])
ap.add_argument("--max-labeled-demos", type=int, default=4)
ap.add_argument("--max-bootstrapped-demos", type=int, default=1)
ap.add_argument("--task-max-tokens", type=int, default=20)
ap.add_argument("--prompt-max-tokens", type=int, default=256)
args = ap.parse_args()
# DSPy MIPROv2 currently errors if max_bootstrapped_demos=0 (empty randint range).
# So we clamp to at least 1.
if args.max_bootstrapped_demos < 1:
args.max_bootstrapped_demos = 1
project_root = Path(__file__).resolve().parents[1]
data_root = args.data_root
if not data_root.is_absolute():
data_root = project_root / data_root
_preflight_check_server(args.base_url)
random.seed(args.seed)
labels = infer_labels(data_root)
label_set = ", ".join(labels)
imgs = list_images(data_root)
random.shuffle(imgs)
train_imgs = imgs[: min(args.train_n, len(imgs))]
val_imgs = imgs[min(args.train_n, len(imgs)) : min(args.train_n + args.val_n, len(imgs))]
trainset = [make_example(project_root, p, label_set) for p in train_imgs]
valset = [make_example(project_root, p, label_set) for p in val_imgs]
# Task LM: deterministic for evaluation
task_lm = dspy.LM(
model=f"openai/{args.model}",
api_base=args.base_url,
api_key=args.api_key,
temperature=0.0,
max_tokens=args.task_max_tokens,
)
# Prompt LM: higher temp for instruction proposals
prompt_lm = dspy.LM(
model=f"openai/{args.model}",
api_base=args.base_url,
api_key=args.api_key,
temperature=0.7,
max_tokens=args.prompt_max_tokens,
)
# Baseline
with dspy.context(lm=task_lm):
student = EuroSATProgram(labels=labels)
base_acc = eval_accuracy(student, valset)
# Optimize with MIPROv2
optimizer = MIPROv2(
metric=acc_metric,
prompt_model=prompt_lm,
task_model=task_lm,
auto=args.auto,
max_labeled_demos=args.max_labeled_demos,
max_bootstrapped_demos=args.max_bootstrapped_demos,
seed=args.seed,
verbose=False,
track_stats=True,
log_dir=str(project_root / "results" / "miprov2_logs"),
)
optimized = optimizer.compile(
student,
trainset=trainset,
valset=valset,
max_labeled_demos=args.max_labeled_demos,
max_bootstrapped_demos=args.max_bootstrapped_demos,
seed=args.seed,
)
with dspy.context(lm=task_lm):
opt_acc = eval_accuracy(optimized, valset)
out_dir = project_root / "results"
out_dir.mkdir(parents=True, exist_ok=True)
report = out_dir / "miprov2_eurosat_report.txt"
state = out_dir / "miprov2_eurosat_optimized.json"
optimized.save(str(state))
report_text = (
f"seed={args.seed} auto={args.auto}\n"
f"base_url={args.base_url} model={args.model}\n"
f"train_n={len(trainset)} val_n={len(valset)}\n"
f"max_labeled_demos={args.max_labeled_demos} max_bootstrapped_demos={args.max_bootstrapped_demos}\n"
f"baseline_acc={base_acc:.3f}\n"
f"optimized_acc={opt_acc:.3f}\n"
f"saved_state={state}\n"
)
report.write_text(report_text)
print(report_text)
if __name__ == "__main__":
main()
#!/usr/bin/env bash
set -euo pipefail
# Start a local llama.cpp server for Qwen3-VL-8B (multimodal) with OpenAI-style endpoints.
#
# Requires:
# - model weights: Qwen3VL-8B-Instruct-Q4_K_M.gguf
# - multimodal projector: mmproj-Qwen3VL-8B-Instruct-Q8_0.gguf
#
# Endpoints (OpenAI-style):
# - health: http://127.0.0.1:${PORT}/health
# - chat: http://127.0.0.1:${PORT}/v1/chat/completions (supports image inputs)
#
# Notes:
# - --media-path is important if you want to send local images via file:// URLs.
# - reasoning flags keep the answer in message.content (avoid empty content + reasoning_content).
LLAMA_SERVER="${LLAMA_SERVER:-/opt/homebrew/bin/llama-server}"
HOST="${HOST:-127.0.0.1}"
PORT="${PORT:-8092}"
MODEL_PATH="${MODEL_PATH:-$HOME/tmp/local_llm_qwen_tests/models/Qwen3-VL-8B-Instruct-GGUF/Qwen3VL-8B-Instruct-Q4_K_M.gguf}"
MMPROJ_PATH="${MMPROJ_PATH:-$HOME/tmp/local_llm_qwen_tests/models/Qwen3-VL-8B-Instruct-GGUF/mmproj-Qwen3VL-8B-Instruct-Q8_0.gguf}"
# Allow the server to read dataset images from this project folder (and subfolders).
MEDIA_PATH="${MEDIA_PATH:-$PWD}"
CTX="${CTX:-4096}"
THREADS="${THREADS:-4}"
THREADS_BATCH="${THREADS_BATCH:-4}"
NGL="${NGL:-99}"
BATCH="${BATCH:-1024}"
UBATCH="${UBATCH:-64}"
echo "Starting Qwen3-VL-8B server on http://${HOST}:${PORT}"
echo " model: ${MODEL_PATH}"
echo " mmproj: ${MMPROJ_PATH}"
echo " media: ${MEDIA_PATH}"
exec "$LLAMA_SERVER" \
-m "$MODEL_PATH" \
-mm "$MMPROJ_PATH" \
--alias qwen3-vl-8b \
--host "$HOST" \
--port "$PORT" \
--media-path "$MEDIA_PATH" \
-c "$CTX" \
-t "$THREADS" \
--threads-batch "$THREADS_BATCH" \
-ngl "$NGL" \
-b "$BATCH" \
-ub "$UBATCH" \
--reasoning-format none \
--reasoning-budget 0 \
--no-webui
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment