Created
March 3, 2025 06:04
-
-
Save kannan4k/0a7df270a95d825850e262f72878bce6 to your computer and use it in GitHub Desktop.
Formats the disk, downloads the models, start the llama.cpp server
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -e | |
echo "π Startup script running..." | |
NVME_DISK="/dev/nvme1n1" | |
MOUNT_POINT="/mnt/nvme" | |
S3_BUCKET="ep-ai-us-east-1" | |
MODEL_DIR="/mnt/nvme/models" | |
MODEL_PATH="/mnt/nvme/models/DeepSeek-R1-GGUF/DeepSeek-R1-UD-IQ1_S/DeepSeek-R1-UD-IQ1_S-00001-of-00003.gguf" | |
LOG_FILE="/var/log/llama_server.log" | |
# Check if the disk is already mounted | |
if mount | grep -q "$MOUNT_POINT"; then | |
echo "β NVMe disk is already mounted. Skipping format & mount steps." | |
else | |
echo "πΉ NVMe disk not found or not mounted. Setting it up..." | |
# Format and mount only if the disk is detected but not mounted | |
if lsblk | grep -q "nvme1n1"; then | |
echo "πΉ Formatting NVMe disk: $NVME_DISK" | |
mkfs.ext4 $NVME_DISK | |
mkdir -p $MOUNT_POINT | |
mount $NVME_DISK $MOUNT_POINT | |
echo "$NVME_DISK $MOUNT_POINT ext4 defaults,nofail 0 2" | sudo tee -a /etc/fstab | |
else | |
echo "β NVMe disk not found. Exiting script." | |
exit 1 | |
fi | |
fi | |
# Ensure s4cmd is installed | |
if ! command -v s4cmd &> /dev/null; then | |
echo "πΉ Installing s4cmd..." | |
sudo apt install -y python3-pip | |
pip3 install s4cmd | |
fi | |
# Download models only if directory is empty | |
mkdir -p $MODEL_DIR | |
if [ -z "$(ls -A $MODEL_DIR)" ]; then | |
echo "πΉ Downloading model files from S3..." | |
/usr/bin/s4cmd get -r s3://$S3_BUCKET/ $MODEL_DIR/ | |
chown -R ubuntu:ubuntu $MODEL_DIR | |
echo "β Model download complete!" | |
else | |
echo "β Model directory is not empty, skipping download." | |
fi | |
echo "Starting llama server.." | |
/home/ubuntu/llama.cpp/build/bin/llama-server \ | |
--model ${MODEL_PATH} \ | |
--host 0.0.0.0 \ | |
--port 10000 \ | |
--cache-type-k q4_0 \ | |
--n-gpu-layers 15 \ | |
--threads 16 \ | |
--ctx-size 2048 \ | |
--seed 3407 \ | |
--log-file ${LOG_FILE} --log-prefix --log-timestamps | |
echo "β Startup script completed!" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment