sutr90 · March 7, 2024 21:15
diff --git a/gistfile1.txt b/gistfile1.txt
 # Install CUDA runtime from https://docs.nvidia.com/cuda/wsl-user-guide/index.html
 # Install pyenv from https://bgasparotto.com/install-pyenv-ubuntu-debian

 # Clone the repo
 git clone https://github.com/imartinez/privateGPT
 cd privateGPT

 # Install Python 3.11
 pyenv install 3.11
 pyenv local 3.11

 # Install Poetry
 pip install poetry

 # Install dependencies
 poetry install --with ui,local

 # Download Embedding and LLM models
 poetry run python scripts/setup

 # To Run on GPU (with CUDA 12)
 CUDACXX=/usr/local/cuda-12/bin/nvcc CMAKE_ARGS="-DLLAMA_CUBLAS=on -DCMAKE_CUDA_ARCHITECTURES=native" FORCE_CMAKE=1 pip install llama-cpp-python --no-cache-dir --force-reinstall --upgrade

 # Run the local server  
 PGPT_PROFILES=local make run

 # Note: on Mac with Metal you should see a ggml_metal_add_buffer log, stating GPU is 
 being used

 # Navigate to the UI and try it out! 
 http://localhost:8001/
	# Install CUDA runtime from https://docs.nvidia.com/cuda/wsl-user-guide/index.html
	# Install pyenv from https://bgasparotto.com/install-pyenv-ubuntu-debian

	# Clone the repo
	git clone https://github.com/imartinez/privateGPT
	cd privateGPT

	# Install Python 3.11
	pyenv install 3.11
	pyenv local 3.11

	# Install Poetry
	pip install poetry

	# Install dependencies
	poetry install --with ui,local

	# Download Embedding and LLM models
	poetry run python scripts/setup

	# To Run on GPU (with CUDA 12)
	CUDACXX=/usr/local/cuda-12/bin/nvcc CMAKE_ARGS="-DLLAMA_CUBLAS=on -DCMAKE_CUDA_ARCHITECTURES=native" FORCE_CMAKE=1 pip install llama-cpp-python --no-cache-dir --force-reinstall --upgrade

	# Run the local server
	PGPT_PROFILES=local make run

	# Note: on Mac with Metal you should see a ggml_metal_add_buffer log, stating GPU is
	being used

	# Navigate to the UI and try it out!
	http://localhost:8001/