From 2d732a994074b9cda4e7648c7f043f769eb8986b Mon Sep 17 00:00:00 2001 From: Akshat Bubna Date: Sun, 24 Sep 2023 21:15:27 +0000 Subject: [PATCH] fix vLLM example (make dirs) --- 06_gpu_and_ml/vllm_inference.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/06_gpu_and_ml/vllm_inference.py b/06_gpu_and_ml/vllm_inference.py index 3f7355dab..a8e8e442a 100644 --- a/06_gpu_and_ml/vllm_inference.py +++ b/06_gpu_and_ml/vllm_inference.py @@ -22,6 +22,8 @@ from modal import Image, Secret, Stub, method +MODEL_DIR = "/model" + # ## Define a container image # @@ -45,15 +47,15 @@ def download_model_to_folder(): from huggingface_hub import snapshot_download + os.makedirs(MODEL_DIR, exist_ok=True) + snapshot_download( "meta-llama/Llama-2-13b-chat-hf", - local_dir="/model", + local_dir=MODEL_DIR, token=os.environ["HUGGINGFACE_TOKEN"], ) -MODEL_DIR = "/model" - # ### Image definition # We’ll start from a Dockerhub image recommended by `vLLM`, upgrade the older # version of `torch` to a new one specifically built for CUDA 11.8. Next, we install `vLLM` from source to get the latest updates.