kibotu · April 16, 2026 13:52
diff --git a/run-mlx-server.sh b/run-mlx-server.sh
 #!/bin/bash

 # MODEL="${MODEL:-mlx-community/Qwen3-4B-4bit}"
 MODEL="${MODEL:-mlx-community/gemma-4-e4b-it-4bit}"
 PORT="${PORT:-8899}"
 TEMP="${TEMP:-0.7}"
 PROMPT_CONC="${PROMPT_CONC:-2}"
 DECODE_CONC="${DECODE_CONC:-2}"

 # Restart configuration
 INITIAL_DELAY=2
 MAX_DELAY=120
 DELAY=$INITIAL_DELAY
 CONSECUTIVE_FAILURES=0
 FAILURE_THRESHOLD=3  # Escalate delays after 3 failures

 if lsof -ti:$PORT > /dev/null 2>&1; then
  echo "Killing existing process on port $PORT..."
  lsof -ti:$PORT | xargs kill -9 2>/dev/null || true
  sleep 1
 fi

 echo "Starting MLX server with:"
 echo "  Model: $MODEL"
 echo "  Port: $PORT"
 echo "  Prompt concurrency: $PROMPT_CONC"
 echo "  Decode concurrency: $DECODE_CONC"

 while true; do
  uv run --with mlx-lm mlx_lm.server \
    --model "$MODEL" \
    --host 0.0.0.0 \
    --port $PORT \
    --temp $TEMP \
    --prompt-concurrency $PROMPT_CONC \
    --decode-concurrency $DECODE_CONC

  EXIT_CODE=$?

  ((CONSECUTIVE_FAILURES++))

  # Exponential backoff: double delay each time, up to MAX_DELAY
  if [ $CONSECUTIVE_FAILURES -ge $FAILURE_THRESHOLD ]; then
    DELAY=$((DELAY * 2))
    if [ $DELAY -gt $MAX_DELAY ]; then
      DELAY=$MAX_DELAY
    fi
    echo "Server exited with code $EXIT_CODE ($CONSECUTIVE_FAILURES consecutive failures). Waiting ${DELAY}s before restart..."
  else
    echo "Server exited with code $EXIT_CODE. Restarting in ${DELAY}s..."
  fi

  sleep $DELAY
 done
	#!/bin/bash

	# MODEL="${MODEL:-mlx-community/Qwen3-4B-4bit}"
	MODEL="${MODEL:-mlx-community/gemma-4-e4b-it-4bit}"
	PORT="${PORT:-8899}"
	TEMP="${TEMP:-0.7}"
	PROMPT_CONC="${PROMPT_CONC:-2}"
	DECODE_CONC="${DECODE_CONC:-2}"

	# Restart configuration
	INITIAL_DELAY=2
	MAX_DELAY=120
	DELAY=$INITIAL_DELAY
	CONSECUTIVE_FAILURES=0
	FAILURE_THRESHOLD=3 # Escalate delays after 3 failures

	if lsof -ti:$PORT > /dev/null 2>&1; then
	echo "Killing existing process on port $PORT..."
	lsof -ti:$PORT \| xargs kill -9 2>/dev/null \|\| true
	sleep 1
	fi

	echo "Starting MLX server with:"
	echo " Model: $MODEL"
	echo " Port: $PORT"
	echo " Prompt concurrency: $PROMPT_CONC"
	echo " Decode concurrency: $DECODE_CONC"

	while true; do
	uv run --with mlx-lm mlx_lm.server \
	--model "$MODEL" \
	--host 0.0.0.0 \
	--port $PORT \
	--temp $TEMP \
	--prompt-concurrency $PROMPT_CONC \
	--decode-concurrency $DECODE_CONC

	EXIT_CODE=$?

	((CONSECUTIVE_FAILURES++))

	# Exponential backoff: double delay each time, up to MAX_DELAY
	if [ $CONSECUTIVE_FAILURES -ge $FAILURE_THRESHOLD ]; then
	DELAY=$((DELAY * 2))
	if [ $DELAY -gt $MAX_DELAY ]; then
	DELAY=$MAX_DELAY
	fi
	echo "Server exited with code $EXIT_CODE ($CONSECUTIVE_FAILURES consecutive failures). Waiting ${DELAY}s before restart..."
	else
	echo "Server exited with code $EXIT_CODE. Restarting in ${DELAY}s..."
	fi

	sleep $DELAY
	done
No results found