From 4676bf2f3018e4205be9cc3f22dd1fb6bff34d9a Mon Sep 17 00:00:00 2001 From: Atinoda <61033436+Atinoda@users.noreply.github.com> Date: Fri, 26 Jul 2024 16:55:44 +0100 Subject: [PATCH] Add no AVX2 variant for `default-nvidia` --- Dockerfile | 24 ++++++++++++++++++++++++ README.md | 1 + 2 files changed, 25 insertions(+) diff --git a/Dockerfile b/Dockerfile index 4e049a5..4a74269 100644 --- a/Dockerfile +++ b/Dockerfile @@ -48,6 +48,21 @@ FROM app_nvidia AS app_nvidia_x RUN chmod +x /scripts/build_extensions.sh && \ . /scripts/build_extensions.sh +# Base No AVX2 +FROM app_base AS app_nvidia_noavx2 +# Install pytorch for CUDA 12.1 +RUN pip3 install torch==2.2.2 torchvision==0.17.2 torchaudio==2.2.2 \ + --index-url https://download.pytorch.org/whl/cu121 +# Install oobabooga/text-generation-webui +RUN ls /app +RUN pip3 install -r /app/requirements_noavx2.txt + +# Extended No AVX2 +FROM app_nvidia_x AS app_nvidia_noavx2_x +# Install extensions +RUN chmod +x /scripts/build_extensions.sh && \ + . /scripts/build_extensions.sh + # ROCM [Untested. Widen your hardware support, AMD!] # Base @@ -159,6 +174,15 @@ RUN echo "Nvidia Extended" > /variant.txt ENV EXTRA_LAUNCH_ARGS="" CMD ["python3", "/app/server.py"] +# Extended without AVX2 +FROM run_base AS default-nvidia-noavx2 +# Copy venv +COPY --from=app_nvidia_noavx2_x $VIRTUAL_ENV $VIRTUAL_ENV +# Variant parameters +RUN echo "Nvidia Extended (No AVX2)" > /variant.txt +ENV EXTRA_LAUNCH_ARGS="" +CMD ["python3", "/app/server.py"] + # ROCM # Base diff --git a/README.md b/README.md index 53ce769..439bde7 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,7 @@ Choose the desired variant by setting the image `:tag` in `docker-compose.yml` u | Platform | Description | |---|---| | `*-nvidia` | CUDA 12.1 inference acceleration. | +| `*-nvidia-noavx2` | CUDA 12.1 inference acceleration with no AVX2 CPU instructions. *Typical use-case is legacy CPU with modern GPU.* | | `*-cpu` | CPU-only inference. *Has become surprisingly fast since the early days!* | | `*-rocm` | ROCM 5.6 inference acceleration. *Experimental and unstable.* | | `*-arc` | Intel Arc XPU and oneAPI inference acceleration. **Not compatible with Intel integrated GPU (iGPU).** *Experimental and unstable.* |