Skip to content

Commit

Permalink
[TRTLLM][DLC] install ammo with TRTLLM for runtime compilation (#1403)
Browse files Browse the repository at this point in the history
  • Loading branch information
Qing Lan authored Dec 15, 2023
1 parent e0b97dc commit 7f64d5c
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 5 deletions.
2 changes: 1 addition & 1 deletion serving/docker/scripts/patch_oss_dlc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ PYTHON=$1
HOME_DIR=/root

pip3 install requests
curl -f -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip
curl -f -o ${HOME_DIR}/oss_compliance.zip https://publish.djl.ai/dlc-licenses/oss_compliance.zip
unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/
cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance
chmod +x /usr/local/bin/testOSSCompliance
Expand Down
15 changes: 11 additions & 4 deletions serving/docker/tensorrt-llm.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
# or in the "LICENSE.txt" file accompanying this file. This file is distributed on an "AS IS"
# BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License for
# the specific language governing permissions and limitations under the License.
ARG version=12.2.2-cudnn8-runtime-ubuntu22.04
ARG version=12.2.2-cudnn8-devel-ubuntu22.04
FROM nvidia/cuda:$version
ARG cuda_version=cu122
ARG python_version=3.10
Expand All @@ -25,6 +25,7 @@ ARG trtllm_toolkit_wheel="https://publish.djl.ai/tensorrt-llm/toolkit/tensorrt_l
ARG trtllm_wheel="https://djl-ai.s3.amazonaws.com/publish/tensorrt-llm/0.6.1/tensorrt_llm-0.6.1-py3-none-any.whl"
ARG triton_toolkit_wheel="https://publish.djl.ai/tritonserver/r23.11/tritontoolkit-23.11-py310-none-any.whl"
ARG pydantic_version=1.10.13
ARG ammo_version=0.5.0
EXPOSE 8080

COPY dockerd-entrypoint-with-cuda-compat.sh /usr/local/bin/dockerd-entrypoint.sh
Expand Down Expand Up @@ -60,19 +61,25 @@ RUN mv *.deb djl-serving_all.deb || true

# Install OpenMPI and other deps
ARG DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y wget unzip openmpi-bin libopenmpi-dev libffi-dev git-lfs rapidjson-dev && \
RUN apt-get update && apt-get install -y g++ wget unzip openmpi-bin libopenmpi-dev libffi-dev git-lfs rapidjson-dev && \
scripts/install_python.sh ${python_version} && \
pip3 cache purge && \
apt-get clean -y && rm -rf /var/lib/apt/lists/*

# Install PyTorch
RUN pip install torch==${TORCH_VERSION} transformers==${transformers_version} accelerate==${accelerate_version} ${peft_wheel} sentencepiece \
mpi4py cuda-python==${cuda_python_version} onnx polygraphy pynvml datasets pydantic==${pydantic_version} && \
mpi4py cuda-python==${cuda_python_version} onnx polygraphy pynvml datasets pydantic==${pydantic_version} scipy torchprofile ninja && \
pip3 cache purge

# Install TensorRT and TRT LLM
# Install TensorRT and TRT-LLM Deps
RUN pip install --no-cache-dir --extra-index-url https://pypi.nvidia.com tensorrt==${tensorrtlibs_version} && \
pip install --no-deps ${trtllm_wheel} && \
pyver=$(echo $python_version | awk -F. '{print $1$2}') && \
# Download and install the AMMO package from the DevZone.
wget https://developer.nvidia.com/downloads/assets/cuda/files/nvidia-ammo/nvidia_ammo-${ammo_version}.tar.gz && \
tar -xzf nvidia_ammo-0.5.0.tar.gz && \
pip install --no-deps nvidia_ammo-${ammo_version}/nvidia_ammo-${ammo_version}-cp${pyver}-cp${pyver}-linux_x86_64.whl && \
rm -rf nvidia_ammo-* && \
pip3 cache purge

# download dependencies
Expand Down

0 comments on commit 7f64d5c

Please sign in to comment.