Created
April 30, 2026 06:18
-
-
Save timsonner/15f64a2b69b9a25be7d55535de9889d5 to your computer and use it in GitHub Desktop.
Rebuilds Unsloth Studio's llama-server with CUDA support on Debian trixie (glibc 2.41+) with kernel 7.0+ and NVIDIA driver 595+
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # fix-unsloth-cuda.sh | |
| # | |
| # Rebuilds Unsloth Studio's llama-server with CUDA support on Debian trixie | |
| # (glibc 2.41+) with kernel 7.0+ and NVIDIA driver 595+. | |
| # | |
| # Root causes fixed: | |
| # 1. Unsloth ships a CPU-only llama-server binary | |
| # 2. CUDA 12.x toolkit installer requires gcc <= 13 (system has gcc 14) | |
| # 3. CUDA 12.x math_functions.h missing noexcept on sinpi/cospi, | |
| # conflicting with glibc 2.40+ declarations | |
| # | |
| # Usage: | |
| # sudo ./fix-unsloth-cuda.sh | |
| # | |
| # Requirements: | |
| # - NVIDIA driver already installed and working (nvidia-smi works) | |
| # - Unsloth Studio installed at ~/.unsloth/ | |
| # - Internet access | |
| set -e | |
| UNSLOTH_DIR="${UNSLOTH_DIR:-/home/user/.unsloth}" | |
| LLAMA_DIR="$UNSLOTH_DIR/llama.cpp" | |
| CUDA_VER="${CUDA_VER:-12.6.3}" | |
| CUDA_DRIVER_VER="560.35.05" | |
| CUDA_RUN="/tmp/cuda_${CUDA_VER}_${CUDA_DRIVER_VER}_linux.run" | |
| CUDA_URL="https://developer.download.nvidia.com/compute/cuda/${CUDA_VER}/local_installers/cuda_${CUDA_VER}_${CUDA_DRIVER_VER}_linux.run" | |
| CUDA_INSTALL_DIR="/usr/local/cuda-${CUDA_VER%.*}" # e.g. /usr/local/cuda-12.6 | |
| CUDA_ARCH="${CUDA_ARCH:-86}" # RTX 3050 = Ampere = 86; change for other GPUs | |
| if [ "$(id -u)" -ne 0 ]; then | |
| echo "Error: run as root (sudo $0)" >&2 | |
| exit 1 | |
| fi | |
| # Detect actual Unsloth user (owner of ~/.unsloth) | |
| UNSLOTH_USER=$(stat -c '%U' "$UNSLOTH_DIR" 2>/dev/null || echo "user") | |
| echo "=== Step 1: Install gcc-13 / g++-13 (CUDA 12.x host compiler) ===" | |
| apt-get install -y --no-install-recommends gcc-13 g++-13 | |
| gcc-13 --version | head -1 | |
| echo "" | |
| echo "=== Step 2: Download CUDA ${CUDA_VER} toolkit installer ===" | |
| if [ ! -f "$CUDA_RUN" ]; then | |
| echo "Downloading from: $CUDA_URL" | |
| if command -v wget >/dev/null 2>&1; then | |
| wget -O "$CUDA_RUN" "$CUDA_URL" | |
| else | |
| curl -L -o "$CUDA_RUN" "$CUDA_URL" | |
| fi | |
| else | |
| echo "Found cached installer at $CUDA_RUN, skipping download." | |
| fi | |
| chmod +x "$CUDA_RUN" | |
| echo "" | |
| echo "=== Step 3: Install CUDA toolkit (toolkit only, no driver) ===" | |
| # --override: bypass gcc version check (system has gcc 14, CUDA supports up to 13) | |
| "$CUDA_RUN" --silent --toolkit --no-drm --override | |
| echo "CUDA installed to $CUDA_INSTALL_DIR" | |
| echo "" | |
| echo "=== Step 4: Patch CUDA math_functions.h (glibc 2.40+ noexcept fix) ===" | |
| # glibc 2.40+ added noexcept to sinpi/cospi/sinpif/cospif. | |
| # CUDA headers lack noexcept, causing cudafe++ to fail with "exception | |
| # specification is incompatible" errors during cmake compiler detection. | |
| MATH_H="$CUDA_INSTALL_DIR/targets/x86_64-linux/include/crt/math_functions.h" | |
| if [ ! -f "$MATH_H" ]; then | |
| echo "Error: $MATH_H not found — CUDA may not have installed correctly." >&2 | |
| exit 1 | |
| fi | |
| # Find the exact line numbers (they differ between CUDA versions) | |
| SINPI_LINE=$(grep -n 'extern __DEVICE_FUNCTIONS_DECL__.*sinpi(double x);' "$MATH_H" | cut -d: -f1) | |
| SINPIF_LINE=$(grep -n 'extern __DEVICE_FUNCTIONS_DECL__.*sinpif(float x);' "$MATH_H" | cut -d: -f1) | |
| COSPI_LINE=$(grep -n 'extern __DEVICE_FUNCTIONS_DECL__.*cospi(double x);' "$MATH_H" | cut -d: -f1) | |
| COSPIF_LINE=$(grep -n 'extern __DEVICE_FUNCTIONS_DECL__.*cospif(float x);' "$MATH_H" | cut -d: -f1) | |
| if [ -z "$SINPI_LINE" ]; then | |
| echo "Note: sinpi already patched or not found — skipping header patch." | |
| else | |
| sed -i \ | |
| -e "${SINPI_LINE}s/sinpi(double x);/sinpi(double x) noexcept;/" \ | |
| -e "${SINPIF_LINE}s/sinpif(float x);/sinpif(float x) noexcept;/" \ | |
| -e "${COSPI_LINE}s/cospi(double x);/cospi(double x) noexcept;/" \ | |
| -e "${COSPIF_LINE}s/cospif(float x);/cospif(float x) noexcept;/" \ | |
| "$MATH_H" | |
| echo "Patched sinpi/cospi/sinpif/cospif with noexcept in $MATH_H" | |
| fi | |
| echo "" | |
| echo "=== Step 5: Register CUDA libs with ldconfig ===" | |
| echo "$CUDA_INSTALL_DIR/lib64" > /etc/ld.so.conf.d/cuda-${CUDA_VER%.*}.conf | |
| ldconfig | |
| echo "ldconfig updated" | |
| echo "" | |
| echo "=== Step 6: Build llama-server with CUDA ===" | |
| if [ ! -d "$LLAMA_DIR" ]; then | |
| echo "Error: $LLAMA_DIR not found. Is Unsloth Studio installed?" >&2 | |
| exit 1 | |
| fi | |
| NVCC="$CUDA_INSTALL_DIR/bin/nvcc" | |
| BUILD_DIR="$LLAMA_DIR/build" | |
| rm -rf "$BUILD_DIR" | |
| export PATH="$CUDA_INSTALL_DIR/bin:$PATH" | |
| # Run cmake and build as the Unsloth user to preserve file ownership | |
| sudo -u "$UNSLOTH_USER" env PATH="$PATH" cmake -B "$BUILD_DIR" \ | |
| -S "$LLAMA_DIR" \ | |
| -DGGML_CUDA=ON \ | |
| -DCMAKE_CUDA_ARCHITECTURES="$CUDA_ARCH" \ | |
| -DCMAKE_CUDA_COMPILER="$NVCC" \ | |
| -DCMAKE_CUDA_HOST_COMPILER=g++-13 \ | |
| -DCMAKE_C_COMPILER=gcc-13 \ | |
| -DCMAKE_CXX_COMPILER=g++-13 | |
| sudo -u "$UNSLOTH_USER" env PATH="$PATH" cmake \ | |
| --build "$BUILD_DIR" \ | |
| --config Release \ | |
| -j"$(nproc)" \ | |
| --target llama-server | |
| echo "" | |
| echo "=== Step 7: Verify ===" | |
| BINARY="$BUILD_DIR/bin/llama-server" | |
| if [ ! -f "$BINARY" ]; then | |
| echo "Error: build did not produce $BINARY" >&2 | |
| exit 1 | |
| fi | |
| CUDA_LIBS=$(ldd "$BINARY" | grep -c 'libcudart\|libcublas' || true) | |
| if [ "$CUDA_LIBS" -ge 2 ]; then | |
| echo "CUDA libs linked: OK" | |
| ldd "$BINARY" | grep -iE 'cuda|cublas' | |
| else | |
| echo "Warning: expected CUDA libs not found in binary." >&2 | |
| ldd "$BINARY" | grep -iE 'cuda|cublas' || true | |
| fi | |
| echo "" | |
| echo "=== Done ===" | |
| echo "" | |
| echo "The new CUDA-enabled llama-server is at:" | |
| echo " $BINARY" | |
| echo "" | |
| echo "Restart Unsloth Studio, load a model, then verify GPU usage with:" | |
| echo " nvidia-smi --query-compute-apps=pid,used_memory --format=csv" | |
| echo "" | |
| echo "Notes:" | |
| echo " - CUDA arch $CUDA_ARCH = Ampere (RTX 3050/3060/3070/3080/3090/A-series)" | |
| echo " For other GPUs: Turing=75, Ada=89, Hopper=90" | |
| echo " Override with: CUDA_ARCH=xx sudo $0" | |
| echo " - CUDA toolkit installed to: $CUDA_INSTALL_DIR" | |
| echo " - Header patch applied: $MATH_H" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment