diff --git a/.devcontainer/Dockerfile.trtllm b/.devcontainer/Dockerfile.trtllm deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/.devcontainer/Dockerfile_trtllm b/.devcontainer/Dockerfile_trtllm new file mode 100644 index 00000000000..21b7114ce03 --- /dev/null +++ b/.devcontainer/Dockerfile_trtllm @@ -0,0 +1,75 @@ +ARG CUDA_ARCH_LIST="75-real;80-real;86-real;89-real;90-real" +ARG OMPI_VERSION="4.1.7rc1" + +# Build dependencies resolver stage +FROM lukemathwalker/cargo-chef:latest AS chef +WORKDIR /usr/src/text-generation-inference/backends/trtllm + +FROM chef AS planner +COPY . . +RUN cargo chef prepare --recipe-path recipe.json + +# CUDA dependent dependencies resolver stage +FROM nvidia/cuda:12.6.3-cudnn-devel-ubuntu24.04 AS cuda-builder + +RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ + --mount=type=cache,target=/var/lib/apt,sharing=locked \ + apt update && apt install -y \ + build-essential \ + cmake \ + curl \ + gcc-14 \ + g++-14 \ + git \ + git-lfs \ + libssl-dev \ + libucx-dev \ + ninja-build \ + pkg-config \ + pipx \ + python3 \ + python3-dev \ + python3-setuptools \ + tar \ + wget && \ + pipx ensurepath + +ENV TGI_INSTALL_PREFIX=/usr/local/tgi +ENV TENSORRT_INSTALL_PREFIX=/usr/local/tensorrt + +# Install OpenMPI +FROM cuda-builder AS mpi-builder +ARG OMPI_VERSION + +ENV OMPI_TARBALL_FILENAME="openmpi-$OMPI_VERSION.tar.bz2" +RUN wget "https://download.open-mpi.org/release/open-mpi/v4.1/$OMPI_TARBALL_FILENAME" -P /opt/src && \ + mkdir /usr/src/mpi && \ + tar -xf "/opt/src/$OMPI_TARBALL_FILENAME" -C /usr/src/mpi --strip-components=1 && \ + cd /usr/src/mpi && \ + ./configure --prefix=/usr/local/mpi --with-cuda=/usr/local/cuda --with-slurm && \ + make -j all && \ + make install && \ + rm -rf "/opt/src/$OMPI_TARBALL_FILENAME" + +# Install TensorRT +FROM cuda-builder AS trt-builder +COPY backends/trtllm/scripts/install_tensorrt.sh /opt/install_tensorrt.sh +RUN chmod +x /opt/install_tensorrt.sh && \ + /opt/install_tensorrt.sh + +# Build Backend +FROM cuda-builder AS tgi-builder +WORKDIR /usr/src/text-generation-inference + +# Install Rust +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | bash -s -- -y && \ + chmod -R a+w /root/.rustup && \ + chmod -R a+w /root/.cargo + +ENV PATH="/root/.cargo/bin:$PATH" +RUN cargo install cargo-chef + +COPY --from=trt-builder /usr/local/tensorrt /usr/local/tensorrt +COPY --from=mpi-builder /usr/local/mpi /usr/local/mpi + +ENV MPI_HOME=/usr/local/mpi \ No newline at end of file diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index e69de29bb2d..8b4eb89c8a5 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,19 @@ +// For format details, see https://aka.ms/devcontainer.json. For config options, see the +// README at: https://github.com/devcontainers/templates/tree/main/src/cpp +{ + "name": "CUDA", + "build": { + "dockerfile": "Dockerfile_trtllm", + "context": ".." + }, + "remoteEnv": { + "PATH": "${containerEnv:PATH}:/usr/local/cuda/bin", + "LD_LIBRARY_PATH": "$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64", + "XLA_FLAGS": "--xla_gpu_cuda_data_dir=/usr/local/cuda" + }, + "customizations" : { + "jetbrains" : { + "backend" : "CLion" + } + } +}