From 4676bf2f3018e4205be9cc3f22dd1fb6bff34d9a Mon Sep 17 00:00:00 2001
From: Atinoda <61033436+Atinoda@users.noreply.github.com>
Date: Fri, 26 Jul 2024 16:55:44 +0100
Subject: [PATCH] Add no AVX2 variant for `default-nvidia`

---
 Dockerfile | 24 ++++++++++++++++++++++++
 README.md  |  1 +
 2 files changed, 25 insertions(+)

diff --git a/Dockerfile b/Dockerfile
index 4e049a5..4a74269 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -48,6 +48,21 @@ FROM app_nvidia AS app_nvidia_x
 RUN chmod +x /scripts/build_extensions.sh && \
     . /scripts/build_extensions.sh
 
+# Base No AVX2
+FROM app_base AS app_nvidia_noavx2
+# Install pytorch for CUDA 12.1
+RUN pip3 install torch==2.2.2 torchvision==0.17.2 torchaudio==2.2.2 \
+    --index-url https://download.pytorch.org/whl/cu121 
+# Install oobabooga/text-generation-webui
+RUN ls /app
+RUN pip3 install -r /app/requirements_noavx2.txt
+
+# Extended No AVX2
+FROM app_nvidia_x AS app_nvidia_noavx2_x
+# Install extensions
+RUN chmod +x /scripts/build_extensions.sh && \
+    . /scripts/build_extensions.sh
+
 
 # ROCM [Untested. Widen your hardware support, AMD!]
 # Base
@@ -159,6 +174,15 @@ RUN echo "Nvidia Extended" > /variant.txt
 ENV EXTRA_LAUNCH_ARGS=""
 CMD ["python3", "/app/server.py"]
 
+# Extended without AVX2
+FROM run_base AS default-nvidia-noavx2
+# Copy venv
+COPY --from=app_nvidia_noavx2_x $VIRTUAL_ENV $VIRTUAL_ENV
+# Variant parameters
+RUN echo "Nvidia Extended (No AVX2)" > /variant.txt
+ENV EXTRA_LAUNCH_ARGS=""
+CMD ["python3", "/app/server.py"]
+
 
 # ROCM
 # Base
diff --git a/README.md b/README.md
index 53ce769..439bde7 100644
--- a/README.md
+++ b/README.md
@@ -38,6 +38,7 @@ Choose the desired variant by setting the image `:tag` in `docker-compose.yml` u
 | Platform | Description | 
 |---|---|
 | `*-nvidia` | CUDA 12.1 inference acceleration. |
+| `*-nvidia-noavx2` | CUDA 12.1 inference acceleration with no AVX2 CPU instructions. *Typical use-case is legacy CPU with modern GPU.* |
 | `*-cpu` | CPU-only inference. *Has become surprisingly fast since the early days!* |
 | `*-rocm` | ROCM 5.6 inference acceleration. *Experimental and unstable.* |
 | `*-arc` | Intel Arc XPU and oneAPI inference acceleration.  **Not compatible with Intel integrated GPU (iGPU).** *Experimental and unstable.* |