From 5539fd42989b6ebc7d29f1900764e963db2c3957 Mon Sep 17 00:00:00 2001
From: Rick <rgreer4@users.noreply.github.com>
Date: Sun, 5 Nov 2023 21:53:28 -0600
Subject: [PATCH] Fix mlc_inference.py model_lib_path, upgrade to CUDA 12.2.2
 (#489)

---
 06_gpu_and_ml/mlc_inference.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/06_gpu_and_ml/mlc_inference.py b/06_gpu_and_ml/mlc_inference.py
index a587ba3f8..7a616c7c0 100644
--- a/06_gpu_and_ml/mlc_inference.py
+++ b/06_gpu_and_ml/mlc_inference.py
@@ -25,11 +25,11 @@
 LLAMA_MODEL_SIZE: str = "13b"
 
 # Define the image and [Modal Stub](https://modal.com/docs/reference/modal.Stub#modalstub).
-# We use an [official NVIDIA CUDA 12.1 image](https://hub.docker.com/r/nvidia/cuda)
+# We use an [official NVIDIA CUDA 12.2 image](https://hub.docker.com/r/nvidia/cuda)
 # to match MLC CUDA requirements.
 image = (
     modal.Image.from_registry(
-        "nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04",
+        "nvidia/cuda:12.2.2-cudnn8-runtime-ubuntu22.04",
         add_python="3.11",
     ).run_commands(
         "apt-get update",
@@ -37,7 +37,7 @@
         # Install git lfs
         "curl -sSf https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash",
         "apt-get install -y git-lfs",
-        "pip3 install --pre --force-reinstall mlc-ai-nightly-cu121 mlc-chat-nightly-cu121 -f https://mlc.ai/wheels",
+        "pip3 install --pre --force-reinstall mlc-ai-nightly-cu122 mlc-chat-nightly-cu122 -f https://mlc.ai/wheels",
     )
     # "These commands will download many prebuilt libraries as well as the chat
     # configuration for Llama-2-7b that mlc_chat needs" [...]
@@ -105,7 +105,7 @@ def stopped_callback(self):
 
     cm = ChatModule(
         model=f"/dist/prebuilt/mlc-chat-Llama-2-{LLAMA_MODEL_SIZE}-chat-hf-q4f16_1",
-        lib_path=f"/dist/prebuilt/lib/Llama-2-{LLAMA_MODEL_SIZE}-chat-hf-q4f16_1-cuda.so",
+        model_lib_path=f"/dist/prebuilt/lib/Llama-2-{LLAMA_MODEL_SIZE}-chat-hf-q4f16_1-cuda.so",
     )
     queue_callback = QueueCallback(callback_interval=1)