Last active
March 19, 2024 03:31
-
-
Save Joelkang/db9d0d254973a519d2e6062502beba7a to your computer and use it in GitHub Desktop.
Dockerfile to create a container with the right deps to quantize models with MLC for CUDA 12.1
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
FROM nvidia/cuda:12.1.1-devel-ubuntu22.04 as deps | |
SHELL ["/bin/bash", "--login", "-c"] | |
# Step 1. Set up Ubuntu | |
RUN apt update && apt install --yes wget ssh git git-lfs vim | |
# NOTE: libcuda.so.1 doesn't exist in NVIDIA's base image, link the stub file to work around | |
RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/lib/x86_64-linux-gnu/libcuda.so.1 | |
WORKDIR /root | |
# Step 2. Set up Conda | |
RUN wget -O miniconda.sh "https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh" | |
RUN bash miniconda.sh -b -p "/root/conda" | |
RUN rm -rf miniconda.sh | |
RUN echo "export PATH=/usr/local/cuda/bin/:/root/conda/bin:\$PATH" >> /root/.profile | |
RUN echo "source /root/conda/etc/profile.d/conda.sh" >> /root/.profile | |
RUN conda init bash | |
# Step 3. Set up Python | |
RUN conda create --yes -n mlc python=3.11 && \ | |
echo "conda activate mlc" >> /root/.profile | |
RUN pip install --pre mlc-ai-nightly-cu121 mlc-chat-nightly-cu121 -f https://mlc.ai/wheels | |
FROM deps as compiler | |
WORKDIR /root | |
# See https://github.com/PanQiWei/AutoGPTQ/issues/194#issuecomment-1638480640 | |
ENV TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6+PTX;8.9;9.0" | |
RUN pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu121 | |
RUN pip install gekko protobuf Optimum | |
RUN git clone https://github.com/PanQiWei/AutoGPTQ.git | |
RUN cd AutoGPTQ && pip install -v . | |
RUN git clone --recursive https://github.com/mlc-ai/mlc-llm/ | |
RUN cd mlc-llm && pip install -v . | |
ARG QUANTIZATION=q4f16_1 | |
ENV QUANTIZATION=$QUANTIZATION | |
ARG MODEL_PATH=/models | |
ENV MODEL_PATH=$MODEL_PATH | |
ARG MODEL_NAME=vicuna-13b-v1.5 | |
ENV MODEL_NAME=$MODEL_NAME | |
ARG OUTPUT_PATH=/compiled | |
ENV OUTPUT_PATH=$OUTPUT_PATH | |
ARG MAX_SEQ_LEN=4096 | |
ENV MAX_SEQ_LEN=$MAX_SEQ_LEN | |
CMD python -m mlc_llm.build \ | |
--model $MODEL_PATH/$MODEL_NAME \ | |
--target cuda-multiarch \ | |
--max-seq-len $MAX_SEQ_LEN \ | |
--artifact_path $OUTPUT_PATH \ | |
--quantization $QUANTIZATION |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment