michaelgold · February 28, 2026 08:33 · michaelgold · Feb 17, 2026
diff --git a/docker-compose.yml b/docker-compose.yml
 services:
  minimax:
    image: ghcr.io/ggml-org/llama.cpp:server-cuda
    container_name: minimax-m25
    restart: unless-stopped

    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]

    environment:
      - NVIDIA_VISIBLE_DEVICES=all
      - NVIDIA_DRIVER_CAPABILITIES=compute,utility

    volumes:
      - ./models:/models

    ports:
      - "8080:8080"

    command: >
      -m /models/minimax-m2.5/minimax-m2.5-Q4_K_M.gguf
      --host 0.0.0.0
      --port 8080
      -ngl 999
      --ctx-size 24576
      --cpu-moe
      --no-warmup
      --parallel 1 
      --batch-size 1024 
      --ubatch-size 256
      --threads 32
      --threads-batch 32

  open-webui:
    image: ghcr.io/open-webui/open-webui:main
    container_name: open-webui
    depends_on:
      - minimax
    restart: unless-stopped
    ports:
      - "3000:8080"
    environment:
      # WebUI expects OpenAI-compatible base
      - OPENAI_API_BASE_URL=http://minimax:8080/v1
      - OPENAI_API_KEY=dummy
      # optional but nice:
      - WEBUI_AUTH=false
	services:
	minimax:
	image: ghcr.io/ggml-org/llama.cpp:server-cuda
	container_name: minimax-m25
	restart: unless-stopped

	deploy:
	resources:
	reservations:
	devices:
	- driver: nvidia
	count: all
	capabilities: [gpu]

	environment:
	- NVIDIA_VISIBLE_DEVICES=all
	- NVIDIA_DRIVER_CAPABILITIES=compute,utility

	volumes:
	- ./models:/models

	ports:
	- "8080:8080"

	command: >
	-m /models/minimax-m2.5/minimax-m2.5-Q4_K_M.gguf
	--host 0.0.0.0
	--port 8080
	-ngl 999
	--ctx-size 24576
	--cpu-moe
	--no-warmup
	--parallel 1
	--batch-size 1024
	--ubatch-size 256
	--threads 32
	--threads-batch 32

	open-webui:
	image: ghcr.io/open-webui/open-webui:main
	container_name: open-webui
	depends_on:
	- minimax
	restart: unless-stopped
	ports:
	- "3000:8080"
	environment:
	# WebUI expects OpenAI-compatible base
	- OPENAI_API_BASE_URL=http://minimax:8080/v1
	- OPENAI_API_KEY=dummy
	# optional but nice:
	- WEBUI_AUTH=false
No results found