Skip to content

Commit

Permalink
Fix mlc_inference.py model_lib_path, upgrade to CUDA 12.2.2 (#489)
Browse files Browse the repository at this point in the history
  • Loading branch information
Rick authored Nov 6, 2023
1 parent 46d09cf commit 5539fd4
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions 06_gpu_and_ml/mlc_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,19 +25,19 @@
LLAMA_MODEL_SIZE: str = "13b"

# Define the image and [Modal Stub](https://modal.com/docs/reference/modal.Stub#modalstub).
# We use an [official NVIDIA CUDA 12.1 image](https://hub.docker.com/r/nvidia/cuda)
# We use an [official NVIDIA CUDA 12.2 image](https://hub.docker.com/r/nvidia/cuda)
# to match MLC CUDA requirements.
image = (
modal.Image.from_registry(
"nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04",
"nvidia/cuda:12.2.2-cudnn8-runtime-ubuntu22.04",
add_python="3.11",
).run_commands(
"apt-get update",
"apt-get install -y curl git",
# Install git lfs
"curl -sSf https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash",
"apt-get install -y git-lfs",
"pip3 install --pre --force-reinstall mlc-ai-nightly-cu121 mlc-chat-nightly-cu121 -f https://mlc.ai/wheels",
"pip3 install --pre --force-reinstall mlc-ai-nightly-cu122 mlc-chat-nightly-cu122 -f https://mlc.ai/wheels",
)
# "These commands will download many prebuilt libraries as well as the chat
# configuration for Llama-2-7b that mlc_chat needs" [...]
Expand Down Expand Up @@ -105,7 +105,7 @@ def stopped_callback(self):

cm = ChatModule(
model=f"/dist/prebuilt/mlc-chat-Llama-2-{LLAMA_MODEL_SIZE}-chat-hf-q4f16_1",
lib_path=f"/dist/prebuilt/lib/Llama-2-{LLAMA_MODEL_SIZE}-chat-hf-q4f16_1-cuda.so",
model_lib_path=f"/dist/prebuilt/lib/Llama-2-{LLAMA_MODEL_SIZE}-chat-hf-q4f16_1-cuda.so",
)
queue_callback = QueueCallback(callback_interval=1)

Expand Down

0 comments on commit 5539fd4

Please sign in to comment.