forked from dusty-nv/jetson-containers
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dockerfile
40 lines (33 loc) · 1.17 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#---
# name: tensorrt_llm
# group: llm
# config: config.py
# depends: [tensorrt, pytorch, transformers, cuda-python]
# test: [test.py]
# requires: '>=35'
# notes: The `tensorrt-llm:builder` container includes the C++ binaries under `/opt`
#---
ARG BASE_IMAGE
FROM ${BASE_IMAGE}
ARG TRT_LLM_VERSION \
TRT_LLM_BRANCH \
TRT_LLM_SOURCE \
TRT_LLM_PATCH \
CUDA_ARCHS \
CUDA_VERSION \
FORCE_BUILD="off" \
BUILD_DIR="/opt/TensorRT-LLM/cpp/build" \
SOURCE_DIR="/opt/TensorRT-LLM" \
SOURCE_TAR="/tmp/TensorRT-LLM/source.tar.gz" \
GIT_PATCHES="/tmp/TensorRT-LLM/patch.diff" \
TMP_DIR="/tmp/TensorRT-LLM/"
COPY ${TRT_LLM_PATCH} ${GIT_PATCHES}
COPY sources/ ${TMP_DIR}
COPY build.sh install.sh /tmp/setup/
RUN /tmp/setup/install.sh || /tmp/setup/build.sh #|| echo "BUILD FAILED!"
RUN pip3 install --upgrade transformers && \
pip3 install --verbose --no-cache-dir uvicorn fastapi
COPY llama.sh ${SOURCE_DIR}/
# name '_device_get_memory_info_fn' is not defined
COPY patches/profiler.py /usr/local/lib/python3.10/dist-packages/tensorrt_llm/profiler.py
COPY patches/convert_utils.py /usr/local/lib/python3.10/dist-packages/tensorrt_llm/models/convert_utils.py