forked from exowanderer/WikidataChat
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dockerfile
86 lines (58 loc) · 2.48 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
ARG CUDA_VERSION="11.8.0"
ARG CUDNN_VERSION="8"
ARG UBUNTU_VERSION="22.04"
ARG CUDA_FROM=nvidia/cuda:$CUDA_VERSION-cudnn$CUDNN_VERSION-devel-ubuntu$UBUNTU_VERSION
ARG OLLAMA_VERSION="0.1.22"
ARG OLLAMA_FROM=ollama/ollama:$OLLAMA_VERSION
FROM $OLLAMA_FROM as ollama
# Base NVidia CUDA Ubuntu image
FROM $CUDA_FROM
ENV PATH="/usr/local/cuda/bin:${PATH}"
# Install essential packages from ubuntu repository
RUN apt-get update -y && \
apt-get install -y --no-install-recommends openssh-server openssh-client git git-lfs && \
apt-get install -y curl && \
apt-get install -y python3 python3-pip python3-venv && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Install node from upstream, ubuntu packages are too old
RUN curl -sL https://deb.nodesource.com/setup_18.x | bash
RUN apt-get install -y nodejs && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Install node package manager yarn
RUN npm install -g yarn
# Install ollama llm inference engine
COPY --from=ollama /usr/bin/ollama /usr/local/ollama/bin/ollama
ENV PATH="/usr/local/ollama/bin:${PATH}"
ENV FRONTEND_STATIC_DIR='./frontend/dist'
ENV EMBEDDING_MODEL='svalabs/german-gpl-adapted-covid'
# Setup the app in workspace
WORKDIR /workspace
# Install backend dependencies
COPY --chmod=755 requirements.txt requirements.txt
RUN pip install -r requirements.txt
# Pull a language model (see LICENSE_STABLELM2.txt)
# ARG OLLAMA_MODEL_NAME=openchat
ARG OLLAMA_MODEL_NAME=stablelm2:1.6b-zephyr
ARG OLLAMA_URL=http://localhost:11434
ENV OLLAMA_MODEL_NAME=${OLLAMA_MODEL_NAME}
ENV OLLAMA_URL=${OLLAMA_URL}
RUN ollama serve & while ! curl ${OLLAMA_URL}; do sleep 1; done; ollama pull $OLLAMA_MODEL_NAME
# Load sentence-transformers model once in order to cache it in the image
# TODO: ARG / ENV for embedder model
RUN echo "from haystack.components.embedders import SentenceTransformersDocumentEmbedder\nSentenceTransformersDocumentEmbedder(model='svalabs/german-gpl-adapted-covid').warm_up()" | python3
# Install frontend dependencies
COPY --chmod=755 frontend/package.json frontend/package.json
COPY --chmod=755 frontend/yarn.lock frontend/yarn.lock
RUN cd frontend && yarn install
# Copy data
COPY --chmod=755 json_input json_input
# Copy backend for production
COPY --chmod=755 wikidatachat wikidatachat
# Copy and build frontend for production (into the frontend/dist folder)
COPY --chmod=755 frontend frontend
RUN cd frontend && yarn build
# Container startup script
COPY --chmod=755 start.sh /start.sh
CMD [ "/start.sh" ]