Skip to content

Instantly share code, notes, and snippets.

@timsonner
Created April 30, 2026 06:18
Show Gist options
  • Select an option

  • Save timsonner/15f64a2b69b9a25be7d55535de9889d5 to your computer and use it in GitHub Desktop.

Select an option

Save timsonner/15f64a2b69b9a25be7d55535de9889d5 to your computer and use it in GitHub Desktop.
Rebuilds Unsloth Studio's llama-server with CUDA support on Debian trixie (glibc 2.41+) with kernel 7.0+ and NVIDIA driver 595+
#!/bin/bash
# fix-unsloth-cuda.sh
#
# Rebuilds Unsloth Studio's llama-server with CUDA support on Debian trixie
# (glibc 2.41+) with kernel 7.0+ and NVIDIA driver 595+.
#
# Root causes fixed:
# 1. Unsloth ships a CPU-only llama-server binary
# 2. CUDA 12.x toolkit installer requires gcc <= 13 (system has gcc 14)
# 3. CUDA 12.x math_functions.h missing noexcept on sinpi/cospi,
# conflicting with glibc 2.40+ declarations
#
# Usage:
# sudo ./fix-unsloth-cuda.sh
#
# Requirements:
# - NVIDIA driver already installed and working (nvidia-smi works)
# - Unsloth Studio installed at ~/.unsloth/
# - Internet access
set -e
UNSLOTH_DIR="${UNSLOTH_DIR:-/home/user/.unsloth}"
LLAMA_DIR="$UNSLOTH_DIR/llama.cpp"
CUDA_VER="${CUDA_VER:-12.6.3}"
CUDA_DRIVER_VER="560.35.05"
CUDA_RUN="/tmp/cuda_${CUDA_VER}_${CUDA_DRIVER_VER}_linux.run"
CUDA_URL="https://developer.download.nvidia.com/compute/cuda/${CUDA_VER}/local_installers/cuda_${CUDA_VER}_${CUDA_DRIVER_VER}_linux.run"
CUDA_INSTALL_DIR="/usr/local/cuda-${CUDA_VER%.*}" # e.g. /usr/local/cuda-12.6
CUDA_ARCH="${CUDA_ARCH:-86}" # RTX 3050 = Ampere = 86; change for other GPUs
if [ "$(id -u)" -ne 0 ]; then
echo "Error: run as root (sudo $0)" >&2
exit 1
fi
# Detect actual Unsloth user (owner of ~/.unsloth)
UNSLOTH_USER=$(stat -c '%U' "$UNSLOTH_DIR" 2>/dev/null || echo "user")
echo "=== Step 1: Install gcc-13 / g++-13 (CUDA 12.x host compiler) ==="
apt-get install -y --no-install-recommends gcc-13 g++-13
gcc-13 --version | head -1
echo ""
echo "=== Step 2: Download CUDA ${CUDA_VER} toolkit installer ==="
if [ ! -f "$CUDA_RUN" ]; then
echo "Downloading from: $CUDA_URL"
if command -v wget >/dev/null 2>&1; then
wget -O "$CUDA_RUN" "$CUDA_URL"
else
curl -L -o "$CUDA_RUN" "$CUDA_URL"
fi
else
echo "Found cached installer at $CUDA_RUN, skipping download."
fi
chmod +x "$CUDA_RUN"
echo ""
echo "=== Step 3: Install CUDA toolkit (toolkit only, no driver) ==="
# --override: bypass gcc version check (system has gcc 14, CUDA supports up to 13)
"$CUDA_RUN" --silent --toolkit --no-drm --override
echo "CUDA installed to $CUDA_INSTALL_DIR"
echo ""
echo "=== Step 4: Patch CUDA math_functions.h (glibc 2.40+ noexcept fix) ==="
# glibc 2.40+ added noexcept to sinpi/cospi/sinpif/cospif.
# CUDA headers lack noexcept, causing cudafe++ to fail with "exception
# specification is incompatible" errors during cmake compiler detection.
MATH_H="$CUDA_INSTALL_DIR/targets/x86_64-linux/include/crt/math_functions.h"
if [ ! -f "$MATH_H" ]; then
echo "Error: $MATH_H not found — CUDA may not have installed correctly." >&2
exit 1
fi
# Find the exact line numbers (they differ between CUDA versions)
SINPI_LINE=$(grep -n 'extern __DEVICE_FUNCTIONS_DECL__.*sinpi(double x);' "$MATH_H" | cut -d: -f1)
SINPIF_LINE=$(grep -n 'extern __DEVICE_FUNCTIONS_DECL__.*sinpif(float x);' "$MATH_H" | cut -d: -f1)
COSPI_LINE=$(grep -n 'extern __DEVICE_FUNCTIONS_DECL__.*cospi(double x);' "$MATH_H" | cut -d: -f1)
COSPIF_LINE=$(grep -n 'extern __DEVICE_FUNCTIONS_DECL__.*cospif(float x);' "$MATH_H" | cut -d: -f1)
if [ -z "$SINPI_LINE" ]; then
echo "Note: sinpi already patched or not found — skipping header patch."
else
sed -i \
-e "${SINPI_LINE}s/sinpi(double x);/sinpi(double x) noexcept;/" \
-e "${SINPIF_LINE}s/sinpif(float x);/sinpif(float x) noexcept;/" \
-e "${COSPI_LINE}s/cospi(double x);/cospi(double x) noexcept;/" \
-e "${COSPIF_LINE}s/cospif(float x);/cospif(float x) noexcept;/" \
"$MATH_H"
echo "Patched sinpi/cospi/sinpif/cospif with noexcept in $MATH_H"
fi
echo ""
echo "=== Step 5: Register CUDA libs with ldconfig ==="
echo "$CUDA_INSTALL_DIR/lib64" > /etc/ld.so.conf.d/cuda-${CUDA_VER%.*}.conf
ldconfig
echo "ldconfig updated"
echo ""
echo "=== Step 6: Build llama-server with CUDA ==="
if [ ! -d "$LLAMA_DIR" ]; then
echo "Error: $LLAMA_DIR not found. Is Unsloth Studio installed?" >&2
exit 1
fi
NVCC="$CUDA_INSTALL_DIR/bin/nvcc"
BUILD_DIR="$LLAMA_DIR/build"
rm -rf "$BUILD_DIR"
export PATH="$CUDA_INSTALL_DIR/bin:$PATH"
# Run cmake and build as the Unsloth user to preserve file ownership
sudo -u "$UNSLOTH_USER" env PATH="$PATH" cmake -B "$BUILD_DIR" \
-S "$LLAMA_DIR" \
-DGGML_CUDA=ON \
-DCMAKE_CUDA_ARCHITECTURES="$CUDA_ARCH" \
-DCMAKE_CUDA_COMPILER="$NVCC" \
-DCMAKE_CUDA_HOST_COMPILER=g++-13 \
-DCMAKE_C_COMPILER=gcc-13 \
-DCMAKE_CXX_COMPILER=g++-13
sudo -u "$UNSLOTH_USER" env PATH="$PATH" cmake \
--build "$BUILD_DIR" \
--config Release \
-j"$(nproc)" \
--target llama-server
echo ""
echo "=== Step 7: Verify ==="
BINARY="$BUILD_DIR/bin/llama-server"
if [ ! -f "$BINARY" ]; then
echo "Error: build did not produce $BINARY" >&2
exit 1
fi
CUDA_LIBS=$(ldd "$BINARY" | grep -c 'libcudart\|libcublas' || true)
if [ "$CUDA_LIBS" -ge 2 ]; then
echo "CUDA libs linked: OK"
ldd "$BINARY" | grep -iE 'cuda|cublas'
else
echo "Warning: expected CUDA libs not found in binary." >&2
ldd "$BINARY" | grep -iE 'cuda|cublas' || true
fi
echo ""
echo "=== Done ==="
echo ""
echo "The new CUDA-enabled llama-server is at:"
echo " $BINARY"
echo ""
echo "Restart Unsloth Studio, load a model, then verify GPU usage with:"
echo " nvidia-smi --query-compute-apps=pid,used_memory --format=csv"
echo ""
echo "Notes:"
echo " - CUDA arch $CUDA_ARCH = Ampere (RTX 3050/3060/3070/3080/3090/A-series)"
echo " For other GPUs: Turing=75, Ada=89, Hopper=90"
echo " Override with: CUDA_ARCH=xx sudo $0"
echo " - CUDA toolkit installed to: $CUDA_INSTALL_DIR"
echo " - Header patch applied: $MATH_H"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment