Skip to content

Commit

Permalink
Merge pull request #2 from supa-thibaud/supa-thibaud-patch-1
Browse files Browse the repository at this point in the history
Update Dockerfile Cuda / flashinfer new versions
  • Loading branch information
supa-thibaud authored Aug 22, 2024
2 parents c81ae67 + e0483fd commit 9f3a399
Showing 1 changed file with 46 additions and 13 deletions.
59 changes: 46 additions & 13 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,19 +1,52 @@
FROM nvidia/cuda:12.1.0-base-ubuntu22.04
ARG CUDA_VERSION=12.1.1
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04
ARG BUILD_TYPE=all
ENV DEBIAN_FRONTEND=noninteractive

RUN apt-get update -y \
&& apt-get install -y python3-pip
RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
&& echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
&& apt update -y \
&& apt install software-properties-common -y \
&& add-apt-repository ppa:deadsnakes/ppa -y && apt update \
&& apt install python3.10 python3.10-dev -y \
&& update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.8 1 && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 2 \
&& update-alternatives --set python3 /usr/bin/python3.10 && apt install python3.10-distutils -y \
&& apt install curl git sudo -y \
&& curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && python3 get-pip.py \
&& python3 --version \
&& python3 -m pip --version \
&& rm -rf /var/lib/apt/lists/* \
&& apt clean

RUN ldconfig /usr/local/cuda-12.1/compat/
WORKDIR /sgl-workspace

# Install Python dependencies
COPY builder/requirements.txt /requirements.txt
RUN --mount=type=cache,target=/root/.cache/pip \
python3 -m pip install --upgrade pip && \
python3 -m pip install --upgrade -r /requirements.txt
RUN python3 -m pip install --upgrade pip setuptools wheel html5lib six \
&& git clone --depth=1 https://github.com/sgl-project/sglang.git \
&& cd sglang \
&& if [ "$BUILD_TYPE" = "srt" ]; then \
python3 -m pip --no-cache-dir install -e "python[srt]"; \
else \
python3 -m pip --no-cache-dir install -e "python[all]"; \
fi

# Install vLLM (switching back to pip installs since issues that required building fork are fixed and space optimization is not as important since caching) and FlashInfer
RUN python3 -m pip install "sglang[all]" && \
python3 -m pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.3
ARG CUDA_VERSION
RUN if [ "$CUDA_VERSION" = "12.1.1" ]; then \
export CUDA_IDENTIFIER=cu121 && \
python3 -m pip --no-cache-dir install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/; \
elif [ "$CUDA_VERSION" = "12.4.1" ]; then \
export CUDA_IDENTIFIER=cu124 && \
python3 -m pip --no-cache-dir install flashinfer -i https://flashinfer.ai/whl/cu124/torch2.4/; \
elif [ "$CUDA_VERSION" = "11.8.0" ]; then \
export CUDA_IDENTIFIER=cu118 && \
python3 -m pip install torch==2.4.0 --index-url https://download.pytorch.org/whl/cu118 && \
python3 -m pip --no-cache-dir install flashinfer -i https://flashinfer.ai/whl/cu118/torch2.4/; \
else \
echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1; \
fi

RUN python3 -m pip cache purge

ENV DEBIAN_FRONTEND=interactive

# Setup for Option 2: Building the Image with the Model included
ARG MODEL_NAME=""
Expand Down Expand Up @@ -47,4 +80,4 @@ RUN --mount=type=secret,id=HF_TOKEN,required=false \
fi

# Start the handler
CMD ["python3", "/src/handler.py"]
CMD ["python3", "/src/handler.py"]

0 comments on commit 9f3a399

Please sign in to comment.