Last active
April 22, 2024 12:47
-
-
Save shwu-nyunai/f53de45648ca7f0265c54a0409d82b2d to your computer and use it in GitHub Desktop.
Install TGI
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# docker pull ghcr.io/huggingface/text-generation-inference:latest | |
# install cmake v3.2 | |
# sudo apt remove cmake -y | |
# sudo rm -rf kineto | |
# sudo rm -rf cmake-3.29.2.tar.gz cmake-3.29.2/ | |
sudo rm -rf text-generation-inference/ | |
tree -L 3 | |
# install openssl | |
# sudo apt-get install libssl-dev | |
# sudo apt-get install build-essential | |
# wget https://github.com/Kitware/CMake/releases/download/v3.29.2/cmake-3.29.2.tar.gz | |
# tar -zxvf cmake-3.29.2.tar.gz | |
# cd cmake-3.29.2 | |
# ./configure | |
# ./bootstrap && sudo make && sudo make install | |
# export PATH=/usr/local/bin:$PATH | |
# export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH | |
# cmake --version | |
# cd .. | |
# git clone --recursive https://github.com/pytorch/kineto.git | |
# cd kineto/libkineto | |
# mkdir build && cd build | |
# cmake .. | |
# make | |
# cd ../../.. | |
pip list | |
pip install torch==2.1.0 torchvision torchaudio | |
python -c "import torch; print('torch version:', torch.__version__)" | |
# check cuda compile | |
python -c "import torch; print('torch installaed with cuda?', torch.cuda.is_available())" | |
# install text-generation-inference | |
# Clone the text-generation-inference repository | |
echo "git clone https://github.com/huggingface/text-generation-inference.git" | |
git clone https://github.com/huggingface/text-generation-inference.git | |
# Navigate to the cloned repository | |
echo "cd text-generation-inference" | |
cd text-generation-inference | |
# Set the BUILD_EXTENSIONS environment variable to True | |
echo "export BUILD_EXTENSIONS=True" | |
export BUILD_EXTENSIONS=True | |
# Install the required dependencies | |
echo "make install" | |
make install | |
echo "✅installation complete" | |
exit | |
# Navigate to the server directory | |
echo "cd server" | |
cd server | |
# Get the current directory path and store it in the this_dir variable | |
echo "this_dir=\$(pwd)" | |
this_dir=$(pwd) | |
echo "This dir: $this_dir" | |
# List the installed Python packages | |
echo "pip list" | |
pip list | |
# # Remove existing directories (if any) | |
# echo "rm -rf flash-attention llm-awq flash-attention-v2 vllm causal-conv1d" | |
# rm -rf flash-attention llm-awq flash-attention-v2 vllm causal-conv1d | |
# # Install causal-conv1d and navigate back to the current directory | |
# echo "make install install-causal-conv1d && cd \$this_dir" | |
# make install install-causal-conv1d && cd $this_dir | |
# # Install flash-attention and navigate back to the current directory | |
# echo "make install install-flash-attention && cd \$this_dir" | |
# make install install-flash-attention && cd $this_dir | |
# # Install flash-attention-v2-cuda and navigate back to the current directory | |
# echo "make install install-flash-attention-v2-cuda && cd \$this_dir" | |
# make install install-flash-attention-v2-cuda && cd $this_dir | |
# # Install vllm-cuda and navigate back to the current directory | |
# echo "make install install-vllm-cuda && cd \$this_dir" | |
# make install install-vllm-cuda && cd $this_dir | |
# # Install awq and navigate back to the current directory | |
# echo "make install install-awq && cd \$this_dir" | |
# make install install-awq && cd $this_dir | |
# # Install exllamav2_kernels | |
# echo "cd exllamav2_kernels && python setup.py install && cd .." | |
# cd exllamav2_kernels && python setup.py install && cd .. | |
# # Install mamba_ssm | |
# echo "pip install mamba_ssm" | |
# pip install mamba_ssm | |
# # Navigate back to the parent directory | |
# echo "cd .." | |
# cd .. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment