Skip to content

Commit

Permalink
split extra/core images
Browse files Browse the repository at this point in the history
Signed-off-by: Ettore Di Giacinto <[email protected]>
  • Loading branch information
mudler committed Oct 18, 2023
1 parent 60be47a commit 722a30f
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 40 deletions.
86 changes: 56 additions & 30 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
ARG GO_VERSION=1.21-bullseye
ARG IMAGE_TYPE=extras
# extras or core

FROM golang:$GO_VERSION as requirements

FROM golang:$GO_VERSION as requirements-core

ARG BUILD_TYPE
ARG CUDA_MAJOR_VERSION=11
Expand Down Expand Up @@ -35,24 +38,6 @@ RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
; fi
ENV PATH /usr/local/cuda/bin:${PATH}

# Extras requirements
COPY extra/requirements.txt /build/extra/requirements.txt
ENV PATH="/root/.cargo/bin:${PATH}"
RUN pip install --upgrade pip
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
RUN if [ "${TARGETARCH}" = "amd64" ]; then \
pip install git+https://github.com/suno-ai/bark.git diffusers invisible_watermark transformers accelerate safetensors;\
fi
RUN if [ "${BUILD_TYPE}" = "cublas" ] && [ "${TARGETARCH}" = "amd64" ]; then \
pip install torch vllm && pip install auto-gptq https://github.com/jllllll/exllama/releases/download/0.0.10/exllama-0.0.10+cu${CUDA_MAJOR_VERSION}${CUDA_MINOR_VERSION}-cp39-cp39-linux_x86_64.whl;\
fi
RUN pip install -r /build/extra/requirements.txt && rm -rf /build/extra/requirements.txt

# Vall-e-X
RUN git clone https://github.com/Plachtaa/VALL-E-X.git /usr/lib/vall-e-x && cd /usr/lib/vall-e-x && pip install -r requirements.txt

WORKDIR /build

# OpenBLAS requirements
RUN apt-get install -y libopenblas-dev

Expand All @@ -61,6 +46,8 @@ RUN apt-get install -y libopencv-dev && \
ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2


WORKDIR /build

# piper requirements
# Use pre-compiled Piper phonemization library (includes onnxruntime)
#RUN if echo "${GO_TAGS}" | grep -q "tts"; then \
Expand All @@ -80,17 +67,40 @@ RUN curl -L "https://github.com/gabime/spdlog/archive/refs/tags/v${SPDLOG_VERSIO
tar -C "lib/Linux-$(uname -m)/piper_phonemize" -xzvf - && ls -liah /build/lib/Linux-$(uname -m)/piper_phonemize/ && \
cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /usr/lib/ && \
ln -s /usr/lib/libpiper_phonemize.so /usr/lib/libpiper_phonemize.so.1 && \
cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/include/. /usr/include/
cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/include/. /usr/include/ && \
rm spdlog-${SPDLOG_VERSION} -rf && \
rm /build/lib/Linux-$(uname -m)/piper_phonemize -rf

# Extras requirements
FROM requirements-core as requirements-extras

COPY extra/requirements.txt /build/extra/requirements.txt
ENV PATH="/root/.cargo/bin:${PATH}"
RUN pip install --upgrade pip
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
RUN if [ "${TARGETARCH}" = "amd64" ]; then \
pip install git+https://github.com/suno-ai/bark.git diffusers invisible_watermark transformers accelerate safetensors;\
fi
RUN if [ "${BUILD_TYPE}" = "cublas" ] && [ "${TARGETARCH}" = "amd64" ]; then \
pip install torch vllm && pip install auto-gptq https://github.com/jllllll/exllama/releases/download/0.0.10/exllama-0.0.10+cu${CUDA_MAJOR_VERSION}${CUDA_MINOR_VERSION}-cp39-cp39-linux_x86_64.whl;\
fi
RUN pip install -r /build/extra/requirements.txt && rm -rf /build/extra/requirements.txt

# Vall-e-X
RUN git clone https://github.com/Plachtaa/VALL-E-X.git /usr/lib/vall-e-x && cd /usr/lib/vall-e-x && pip install -r requirements.txt

# \
# ; fi

###################################
###################################

FROM requirements as builder
FROM requirements-${IMAGE_TYPE} as builder

ARG GO_TAGS="stablediffusion tts"

ARG GRPC_BACKENDS
ARG BUILD_GRPC=true
ENV GRPC_BACKENDS=${GRPC_BACKENDS}
ENV GO_TAGS=${GO_TAGS}
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
Expand All @@ -108,18 +118,20 @@ COPY .git .
# stablediffusion does not tolerate a newer version of abseil, build it first
RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build

RUN git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
RUN if [ "${BUILD_GRPC}" = "true" ]; then \
git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
-DgRPC_BUILD_TESTS=OFF \
../.. && make -j12 install && rm -rf grpc
../.. && make -j12 install && rm -rf grpc \
; fi

# Rebuild with defaults backends
RUN ESPEAK_DATA=/build/lib/Linux-$(uname -m)/piper_phonemize/lib/espeak-ng-data make build

###################################
###################################

FROM requirements
FROM requirements-${IMAGE_TYPE}

ARG FFMPEG
ARG BUILD_TYPE
Expand All @@ -129,6 +141,11 @@ ENV BUILD_TYPE=${BUILD_TYPE}
ENV REBUILD=false
ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz

ARG CUDA_MAJOR_VERSION=11
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
ENV NVIDIA_VISIBLE_DEVICES=all

# Add FFmpeg
RUN if [ "${FFMPEG}" = "true" ]; then \
apt-get install -y ffmpeg \
Expand All @@ -146,16 +163,25 @@ RUN make prepare-sources
# Copy the binary
COPY --from=builder /build/local-ai ./

# do not let piper rebuild (requires an older version of absl)
COPY --from=builder /build/backend-assets/grpc/piper ./backend-assets/grpc/piper
# do not let stablediffusion rebuild (requires an older version of absl)
COPY --from=builder /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion

# Copy VALLE-X as it's not a real "lib"
RUN cp -rfv /usr/lib/vall-e-x/* ./

# To resolve exllama import error
RUN if [ "${BUILD_TYPE}" = "cublas" ] && [ "${TARGETARCH:-$(go env GOARCH)}" = "amd64" ]; then \
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
cp -rfv /usr/lib/vall-e-x/* ./ ; \
if [ "${BUILD_TYPE}" = "cublas" ] && [ "${TARGETARCH:-$(go env GOARCH)}" = "amd64" ]; then \
cp -rfv /usr/local/lib/python3.9/dist-packages/exllama extra/grpc/exllama/;\
fi ;\
fi
# Copy VALLE-X as it's not a real "lib"

#RUN cp -rfv /usr/lib/vall-e-x/* ./

# To resolve exllama import error
#RUN if [ "${BUILD_TYPE}" = "cublas" ] && [ "${TARGETARCH:-$(go env GOARCH)}" = "amd64" ]; then \
# cp -rfv /usr/local/lib/python3.9/dist-packages/exllama extra/grpc/exllama/;\
# fi
# Define the health check command
HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
CMD curl -f $HEALTHCHECK_ENDPOINT || exit 1
Expand Down
9 changes: 4 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -335,13 +335,12 @@ test: prepare test-models/testmodel grpcs
$(MAKE) test-stablediffusion

prepare-e2e:
wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(abspath ./tests/e2e-fixtures)/ggllm-test-model.bin
docker build --build-arg BUILD_TYPE=cublas --build-arg CUDA_MAJOR_VERSION=11 --build-arg CUDA_MINOR_VERSION=7 --build-arg FFMPEG=true -t localai-tests .
test -e $(abspath ./tests/e2e-fixtures)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(abspath ./tests/e2e-fixtures)/ggllm-test-model.bin
docker build --build-arg BUILD_GRPC=false --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=11 --build-arg CUDA_MINOR_VERSION=7 --build-arg FFMPEG=true -t localai-tests .

run-e2e-image:
cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml models/
ls -liah $(abspath ./models)
docker run -p 5390:8080 -e MODELS_PATH=/models -e THREADS=1 -e DEBUG=true -d --rm -v $(abspath ./models):/models --gpus all --name e2e-tests-$(RANDOM) localai-tests
ls -liah $(abspath ./tests/e2e-fixtures)
docker run -p 5390:8080 -e MODELS_PATH=/models -e THREADS=1 -e DEBUG=true -d --rm -v $(abspath ./tests/e2e-fixtures):/models --gpus all --name e2e-tests-$(RANDOM) localai-tests

test-e2e:
@echo 'Running e2e tests'
Expand Down
1 change: 1 addition & 0 deletions tests/e2e-fixtures/gpu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ mmap: true
parameters:
model: ggllm-test-model.bin
rope_freq_base: 10000
max_tokens: 20
rope_freq_scale: 1
temperature: 0.2
top_k: 40
Expand Down
6 changes: 1 addition & 5 deletions tests/e2e/e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,17 +37,13 @@ var _ = Describe("E2E test", func() {
// Execute docker logs $$(docker ps -q --filter ancestor=localai-tests) as a command and check the output
cmd := exec.Command("/bin/bash", "-xce", "docker logs $(docker ps -q --filter ancestor=localai-tests)")
out, err := cmd.CombinedOutput()
Expect(err).ToNot(HaveOccurred())
Expect(err).ToNot(HaveOccurred(), string(out))
Expect(string(out)).To(ContainSubstring("found 1 CUDA devices"), string(out))
Expect(string(out)).To(ContainSubstring("using CUDA for GPU acceleration"), string(out))
})

Context("Generates text", func() {
It("streams chat tokens", func() {
// models, err := client.ListModels(context.TODO())
// Expect(err).ToNot(HaveOccurred())
// Expect(models.Models).ToNot(BeEmpty(), models.Models)

model := "gpt-4"
resp, err := client.CreateChatCompletion(context.TODO(),
openai.ChatCompletionRequest{
Expand Down

0 comments on commit 722a30f

Please sign in to comment.