Fix mlc_inference.py model_lib_path, upgrade to CUDA 12.2.2 (#489)

modal-labs · Nov 6, 2023 · 5539fd4 · 5539fd4
1 parent 46d09cf
commit 5539fd4
Showing 1 changed file with 4 additions and 4 deletions.
diff --git a/06_gpu_and_ml/mlc_inference.py b/06_gpu_and_ml/mlc_inference.py
@@ -25,19 +25,19 @@
 LLAMA_MODEL_SIZE: str = "13b"
 
 # Define the image and [Modal Stub](https://modal.com/docs/reference/modal.Stub#modalstub).
-# We use an [official NVIDIA CUDA 12.1 image](https://hub.docker.com/r/nvidia/cuda)
+# We use an [official NVIDIA CUDA 12.2 image](https://hub.docker.com/r/nvidia/cuda)
 # to match MLC CUDA requirements.
 image = (
     modal.Image.from_registry(
-        "nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04",
+        "nvidia/cuda:12.2.2-cudnn8-runtime-ubuntu22.04",
         add_python="3.11",
     ).run_commands(
         "apt-get update",
         "apt-get install -y curl git",
         # Install git lfs
         "curl -sSf https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash",
         "apt-get install -y git-lfs",
-        "pip3 install --pre --force-reinstall mlc-ai-nightly-cu121 mlc-chat-nightly-cu121 -f https://mlc.ai/wheels",
+        "pip3 install --pre --force-reinstall mlc-ai-nightly-cu122 mlc-chat-nightly-cu122 -f https://mlc.ai/wheels",
     )
     # "These commands will download many prebuilt libraries as well as the chat
     # configuration for Llama-2-7b that mlc_chat needs" [...]
@@ -105,7 +105,7 @@ def stopped_callback(self):
 
     cm = ChatModule(
         model=f"/dist/prebuilt/mlc-chat-Llama-2-{LLAMA_MODEL_SIZE}-chat-hf-q4f16_1",
-        lib_path=f"/dist/prebuilt/lib/Llama-2-{LLAMA_MODEL_SIZE}-chat-hf-q4f16_1-cuda.so",
+        model_lib_path=f"/dist/prebuilt/lib/Llama-2-{LLAMA_MODEL_SIZE}-chat-hf-q4f16_1-cuda.so",
     )
     queue_callback = QueueCallback(callback_interval=1)