timsonner · April 30, 2026 06:18
diff --git a/fix-unsloth-cuda.sh b/fix-unsloth-cuda.sh
 #!/bin/bash
 # fix-unsloth-cuda.sh
 #
 # Rebuilds Unsloth Studio's llama-server with CUDA support on Debian trixie
 # (glibc 2.41+) with kernel 7.0+ and NVIDIA driver 595+.
 #
 # Root causes fixed:
 #   1. Unsloth ships a CPU-only llama-server binary
 #   2. CUDA 12.x toolkit installer requires gcc <= 13 (system has gcc 14)
 #   3. CUDA 12.x math_functions.h missing noexcept on sinpi/cospi,
 #      conflicting with glibc 2.40+ declarations
 #
 # Usage:
 #   sudo ./fix-unsloth-cuda.sh
 #
 # Requirements:
 #   - NVIDIA driver already installed and working (nvidia-smi works)
 #   - Unsloth Studio installed at ~/.unsloth/
 #   - Internet access

 set -e

 UNSLOTH_DIR="${UNSLOTH_DIR:-/home/user/.unsloth}"
 LLAMA_DIR="$UNSLOTH_DIR/llama.cpp"
 CUDA_VER="${CUDA_VER:-12.6.3}"
 CUDA_DRIVER_VER="560.35.05"
 CUDA_RUN="/tmp/cuda_${CUDA_VER}_${CUDA_DRIVER_VER}_linux.run"
 CUDA_URL="https://developer.download.nvidia.com/compute/cuda/${CUDA_VER}/local_installers/cuda_${CUDA_VER}_${CUDA_DRIVER_VER}_linux.run"
 CUDA_INSTALL_DIR="/usr/local/cuda-${CUDA_VER%.*}"   # e.g. /usr/local/cuda-12.6
 CUDA_ARCH="${CUDA_ARCH:-86}"   # RTX 3050 = Ampere = 86; change for other GPUs

 if [ "$(id -u)" -ne 0 ]; then
    echo "Error: run as root (sudo $0)" >&2
    exit 1
 fi

 # Detect actual Unsloth user (owner of ~/.unsloth)
 UNSLOTH_USER=$(stat -c '%U' "$UNSLOTH_DIR" 2>/dev/null || echo "user")

 echo "=== Step 1: Install gcc-13 / g++-13 (CUDA 12.x host compiler) ==="
 apt-get install -y --no-install-recommends gcc-13 g++-13
 gcc-13 --version | head -1

 echo ""
 echo "=== Step 2: Download CUDA ${CUDA_VER} toolkit installer ==="
 if [ ! -f "$CUDA_RUN" ]; then
    echo "Downloading from: $CUDA_URL"
    if command -v wget >/dev/null 2>&1; then
        wget -O "$CUDA_RUN" "$CUDA_URL"
    else
        curl -L -o "$CUDA_RUN" "$CUDA_URL"
    fi
 else
    echo "Found cached installer at $CUDA_RUN, skipping download."
 fi
 chmod +x "$CUDA_RUN"

 echo ""
 echo "=== Step 3: Install CUDA toolkit (toolkit only, no driver) ==="
 # --override: bypass gcc version check (system has gcc 14, CUDA supports up to 13)
 "$CUDA_RUN" --silent --toolkit --no-drm --override
 echo "CUDA installed to $CUDA_INSTALL_DIR"

 echo ""
 echo "=== Step 4: Patch CUDA math_functions.h (glibc 2.40+ noexcept fix) ==="
 # glibc 2.40+ added noexcept to sinpi/cospi/sinpif/cospif.
 # CUDA headers lack noexcept, causing cudafe++ to fail with "exception
 # specification is incompatible" errors during cmake compiler detection.
 MATH_H="$CUDA_INSTALL_DIR/targets/x86_64-linux/include/crt/math_functions.h"

 if [ ! -f "$MATH_H" ]; then
    echo "Error: $MATH_H not found — CUDA may not have installed correctly." >&2
    exit 1
 fi

 # Find the exact line numbers (they differ between CUDA versions)
 SINPI_LINE=$(grep -n 'extern __DEVICE_FUNCTIONS_DECL__.*sinpi(double x);' "$MATH_H" | cut -d: -f1)
 SINPIF_LINE=$(grep -n 'extern __DEVICE_FUNCTIONS_DECL__.*sinpif(float x);' "$MATH_H" | cut -d: -f1)
 COSPI_LINE=$(grep -n 'extern __DEVICE_FUNCTIONS_DECL__.*cospi(double x);' "$MATH_H" | cut -d: -f1)
 COSPIF_LINE=$(grep -n 'extern __DEVICE_FUNCTIONS_DECL__.*cospif(float x);' "$MATH_H" | cut -d: -f1)

 if [ -z "$SINPI_LINE" ]; then
    echo "Note: sinpi already patched or not found — skipping header patch."
 else
    sed -i \
        -e "${SINPI_LINE}s/sinpi(double x);/sinpi(double x) noexcept;/" \
        -e "${SINPIF_LINE}s/sinpif(float x);/sinpif(float x) noexcept;/" \
        -e "${COSPI_LINE}s/cospi(double x);/cospi(double x) noexcept;/" \
        -e "${COSPIF_LINE}s/cospif(float x);/cospif(float x) noexcept;/" \
        "$MATH_H"
    echo "Patched sinpi/cospi/sinpif/cospif with noexcept in $MATH_H"
 fi

 echo ""
 echo "=== Step 5: Register CUDA libs with ldconfig ==="
 echo "$CUDA_INSTALL_DIR/lib64" > /etc/ld.so.conf.d/cuda-${CUDA_VER%.*}.conf
 ldconfig
 echo "ldconfig updated"

 echo ""
 echo "=== Step 6: Build llama-server with CUDA ==="
 if [ ! -d "$LLAMA_DIR" ]; then
    echo "Error: $LLAMA_DIR not found. Is Unsloth Studio installed?" >&2
    exit 1
 fi

 NVCC="$CUDA_INSTALL_DIR/bin/nvcc"
 BUILD_DIR="$LLAMA_DIR/build"

 rm -rf "$BUILD_DIR"

 export PATH="$CUDA_INSTALL_DIR/bin:$PATH"

 # Run cmake and build as the Unsloth user to preserve file ownership
 sudo -u "$UNSLOTH_USER" env PATH="$PATH" cmake -B "$BUILD_DIR" \
    -S "$LLAMA_DIR" \
    -DGGML_CUDA=ON \
    -DCMAKE_CUDA_ARCHITECTURES="$CUDA_ARCH" \
    -DCMAKE_CUDA_COMPILER="$NVCC" \
    -DCMAKE_CUDA_HOST_COMPILER=g++-13 \
    -DCMAKE_C_COMPILER=gcc-13 \
    -DCMAKE_CXX_COMPILER=g++-13

 sudo -u "$UNSLOTH_USER" env PATH="$PATH" cmake \
    --build "$BUILD_DIR" \
    --config Release \
    -j"$(nproc)" \
    --target llama-server

 echo ""
 echo "=== Step 7: Verify ==="
 BINARY="$BUILD_DIR/bin/llama-server"
 if [ ! -f "$BINARY" ]; then
    echo "Error: build did not produce $BINARY" >&2
    exit 1
 fi

 CUDA_LIBS=$(ldd "$BINARY" | grep -c 'libcudart\|libcublas' || true)
 if [ "$CUDA_LIBS" -ge 2 ]; then
    echo "CUDA libs linked: OK"
    ldd "$BINARY" | grep -iE 'cuda|cublas'
 else
    echo "Warning: expected CUDA libs not found in binary." >&2
    ldd "$BINARY" | grep -iE 'cuda|cublas' || true
 fi

 echo ""
 echo "=== Done ==="
 echo ""
 echo "The new CUDA-enabled llama-server is at:"
 echo "  $BINARY"
 echo ""
 echo "Restart Unsloth Studio, load a model, then verify GPU usage with:"
 echo "  nvidia-smi --query-compute-apps=pid,used_memory --format=csv"
 echo ""
 echo "Notes:"
 echo "  - CUDA arch $CUDA_ARCH = Ampere (RTX 3050/3060/3070/3080/3090/A-series)"
 echo "    For other GPUs: Turing=75, Ada=89, Hopper=90"
 echo "    Override with: CUDA_ARCH=xx sudo $0"
 echo "  - CUDA toolkit installed to: $CUDA_INSTALL_DIR"
 echo "  - Header patch applied: $MATH_H"
	#!/bin/bash
	# fix-unsloth-cuda.sh
	#
	# Rebuilds Unsloth Studio's llama-server with CUDA support on Debian trixie
	# (glibc 2.41+) with kernel 7.0+ and NVIDIA driver 595+.
	#
	# Root causes fixed:
	# 1. Unsloth ships a CPU-only llama-server binary
	# 2. CUDA 12.x toolkit installer requires gcc <= 13 (system has gcc 14)
	# 3. CUDA 12.x math_functions.h missing noexcept on sinpi/cospi,
	# conflicting with glibc 2.40+ declarations
	#
	# Usage:
	# sudo ./fix-unsloth-cuda.sh
	#
	# Requirements:
	# - NVIDIA driver already installed and working (nvidia-smi works)
	# - Unsloth Studio installed at ~/.unsloth/
	# - Internet access

	set -e

	UNSLOTH_DIR="${UNSLOTH_DIR:-/home/user/.unsloth}"
	LLAMA_DIR="$UNSLOTH_DIR/llama.cpp"
	CUDA_VER="${CUDA_VER:-12.6.3}"
	CUDA_DRIVER_VER="560.35.05"
	CUDA_RUN="/tmp/cuda_${CUDA_VER}_${CUDA_DRIVER_VER}_linux.run"
	CUDA_URL="https://developer.download.nvidia.com/compute/cuda/${CUDA_VER}/local_installers/cuda_${CUDA_VER}_${CUDA_DRIVER_VER}_linux.run"
	CUDA_INSTALL_DIR="/usr/local/cuda-${CUDA_VER%.*}" # e.g. /usr/local/cuda-12.6
	CUDA_ARCH="${CUDA_ARCH:-86}" # RTX 3050 = Ampere = 86; change for other GPUs

	if [ "$(id -u)" -ne 0 ]; then
	echo "Error: run as root (sudo $0)" >&2
	exit 1
	fi

	# Detect actual Unsloth user (owner of ~/.unsloth)
	UNSLOTH_USER=$(stat -c '%U' "$UNSLOTH_DIR" 2>/dev/null \|\| echo "user")

	echo "=== Step 1: Install gcc-13 / g++-13 (CUDA 12.x host compiler) ==="
	apt-get install -y --no-install-recommends gcc-13 g++-13
	gcc-13 --version \| head -1

	echo ""
	echo "=== Step 2: Download CUDA ${CUDA_VER} toolkit installer ==="
	if [ ! -f "$CUDA_RUN" ]; then
	echo "Downloading from: $CUDA_URL"
	if command -v wget >/dev/null 2>&1; then
	wget -O "$CUDA_RUN" "$CUDA_URL"
	else
	curl -L -o "$CUDA_RUN" "$CUDA_URL"
	fi
	else
	echo "Found cached installer at $CUDA_RUN, skipping download."
	fi
	chmod +x "$CUDA_RUN"

	echo ""
	echo "=== Step 3: Install CUDA toolkit (toolkit only, no driver) ==="
	# --override: bypass gcc version check (system has gcc 14, CUDA supports up to 13)
	"$CUDA_RUN" --silent --toolkit --no-drm --override
	echo "CUDA installed to $CUDA_INSTALL_DIR"

	echo ""
	echo "=== Step 4: Patch CUDA math_functions.h (glibc 2.40+ noexcept fix) ==="
	# glibc 2.40+ added noexcept to sinpi/cospi/sinpif/cospif.
	# CUDA headers lack noexcept, causing cudafe++ to fail with "exception
	# specification is incompatible" errors during cmake compiler detection.
	MATH_H="$CUDA_INSTALL_DIR/targets/x86_64-linux/include/crt/math_functions.h"

	if [ ! -f "$MATH_H" ]; then
	echo "Error: $MATH_H not found — CUDA may not have installed correctly." >&2
	exit 1
	fi

	# Find the exact line numbers (they differ between CUDA versions)
	SINPI_LINE=$(grep -n 'extern __DEVICE_FUNCTIONS_DECL__.*sinpi(double x);' "$MATH_H" \| cut -d: -f1)
	SINPIF_LINE=$(grep -n 'extern __DEVICE_FUNCTIONS_DECL__.*sinpif(float x);' "$MATH_H" \| cut -d: -f1)
	COSPI_LINE=$(grep -n 'extern __DEVICE_FUNCTIONS_DECL__.*cospi(double x);' "$MATH_H" \| cut -d: -f1)
	COSPIF_LINE=$(grep -n 'extern __DEVICE_FUNCTIONS_DECL__.*cospif(float x);' "$MATH_H" \| cut -d: -f1)

	if [ -z "$SINPI_LINE" ]; then
	echo "Note: sinpi already patched or not found — skipping header patch."
	else
	sed -i \
	-e "${SINPI_LINE}s/sinpi(double x);/sinpi(double x) noexcept;/" \
	-e "${SINPIF_LINE}s/sinpif(float x);/sinpif(float x) noexcept;/" \
	-e "${COSPI_LINE}s/cospi(double x);/cospi(double x) noexcept;/" \
	-e "${COSPIF_LINE}s/cospif(float x);/cospif(float x) noexcept;/" \
	"$MATH_H"
	echo "Patched sinpi/cospi/sinpif/cospif with noexcept in $MATH_H"
	fi

	echo ""
	echo "=== Step 5: Register CUDA libs with ldconfig ==="
	echo "$CUDA_INSTALL_DIR/lib64" > /etc/ld.so.conf.d/cuda-${CUDA_VER%.*}.conf
	ldconfig
	echo "ldconfig updated"

	echo ""
	echo "=== Step 6: Build llama-server with CUDA ==="
	if [ ! -d "$LLAMA_DIR" ]; then
	echo "Error: $LLAMA_DIR not found. Is Unsloth Studio installed?" >&2
	exit 1
	fi

	NVCC="$CUDA_INSTALL_DIR/bin/nvcc"
	BUILD_DIR="$LLAMA_DIR/build"

	rm -rf "$BUILD_DIR"

	export PATH="$CUDA_INSTALL_DIR/bin:$PATH"

	# Run cmake and build as the Unsloth user to preserve file ownership
	sudo -u "$UNSLOTH_USER" env PATH="$PATH" cmake -B "$BUILD_DIR" \
	-S "$LLAMA_DIR" \
	-DGGML_CUDA=ON \
	-DCMAKE_CUDA_ARCHITECTURES="$CUDA_ARCH" \
	-DCMAKE_CUDA_COMPILER="$NVCC" \
	-DCMAKE_CUDA_HOST_COMPILER=g++-13 \
	-DCMAKE_C_COMPILER=gcc-13 \
	-DCMAKE_CXX_COMPILER=g++-13

	sudo -u "$UNSLOTH_USER" env PATH="$PATH" cmake \
	--build "$BUILD_DIR" \
	--config Release \
	-j"$(nproc)" \
	--target llama-server

	echo ""
	echo "=== Step 7: Verify ==="
	BINARY="$BUILD_DIR/bin/llama-server"
	if [ ! -f "$BINARY" ]; then
	echo "Error: build did not produce $BINARY" >&2
	exit 1
	fi

	CUDA_LIBS=$(ldd "$BINARY" \| grep -c 'libcudart\\|libcublas' \|\| true)
	if [ "$CUDA_LIBS" -ge 2 ]; then
	echo "CUDA libs linked: OK"
	ldd "$BINARY" \| grep -iE 'cuda\|cublas'
	else
	echo "Warning: expected CUDA libs not found in binary." >&2
	ldd "$BINARY" \| grep -iE 'cuda\|cublas' \|\| true
	fi

	echo ""
	echo "=== Done ==="
	echo ""
	echo "The new CUDA-enabled llama-server is at:"
	echo " $BINARY"
	echo ""
	echo "Restart Unsloth Studio, load a model, then verify GPU usage with:"
	echo " nvidia-smi --query-compute-apps=pid,used_memory --format=csv"
	echo ""
	echo "Notes:"
	echo " - CUDA arch $CUDA_ARCH = Ampere (RTX 3050/3060/3070/3080/3090/A-series)"
	echo " For other GPUs: Turing=75, Ada=89, Hopper=90"
	echo " Override with: CUDA_ARCH=xx sudo $0"
	echo " - CUDA toolkit installed to: $CUDA_INSTALL_DIR"
	echo " - Header patch applied: $MATH_H"
No results found