Add default-nvidia-tensorrtllm variant

Atinoda · Jul 26, 2024 · f51fab4 · f51fab4
1 parent 4676bf2
commit f51fab4
Show file tree

Hide file tree

Showing 2 changed files with 14 additions and 0 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -183,6 +183,19 @@ RUN echo "Nvidia Extended (No AVX2)" > /variant.txt
 ENV EXTRA_LAUNCH_ARGS=""
 CMD ["python3", "/app/server.py"]
 
+# Extended with TensorRT-LLM
+FROM run_base AS default-nvidia-tensorrtllm
+# Copy venv
+COPY --from=app_nvidia_x $VIRTUAL_ENV $VIRTUAL_ENV
+# Install TensorRT-LLM
+RUN apt install -y openmpi-bin libopenmpi-dev
+RUN pip3 install tensorrt_llm==0.10.0 -U --pre --extra-index-url https://pypi.nvidia.com
+ENV LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
+# Variant parameters
+RUN echo "Nvidia Extended (TensorRT-LLM)" > /variant.txt
+ENV EXTRA_LAUNCH_ARGS=""
+CMD ["python3", "/app/server.py"]
+
 
 # ROCM
 # Base

diff --git a/README.md b/README.md
@@ -39,6 +39,7 @@ Choose the desired variant by setting the image `:tag` in `docker-compose.yml` u
 |---|---|
 | `*-nvidia` | CUDA 12.1 inference acceleration. |
 | `*-nvidia-noavx2` | CUDA 12.1 inference acceleration with no AVX2 CPU instructions. *Typical use-case is legacy CPU with modern GPU.* |
+| `*-nvidia-tenssorrtllm` | CUDA 12.1 inference acceleration with additional TensorRT-LLM library pre-installed. |
 | `*-cpu` | CPU-only inference. *Has become surprisingly fast since the early days!* |
 | `*-rocm` | ROCM 5.6 inference acceleration. *Experimental and unstable.* |
 | `*-arc` | Intel Arc XPU and oneAPI inference acceleration.  **Not compatible with Intel integrated GPU (iGPU).** *Experimental and unstable.* |