From c353752c5159b2f2920a202f287a2e7dff15692f Mon Sep 17 00:00:00 2001 From: Akshat Bubna Date: Sun, 24 Sep 2023 21:15:27 +0000 Subject: [PATCH] fix vLLM example (make dirs) --- 06_gpu_and_ml/vllm_inference.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/06_gpu_and_ml/vllm_inference.py b/06_gpu_and_ml/vllm_inference.py index 3f7355dab..435d99e7c 100644 --- a/06_gpu_and_ml/vllm_inference.py +++ b/06_gpu_and_ml/vllm_inference.py @@ -22,6 +22,7 @@ from modal import Image, Secret, Stub, method +MODEL_DIR = "/model" # ## Define a container image # @@ -45,15 +46,15 @@ def download_model_to_folder(): from huggingface_hub import snapshot_download + os.makedirs(MODEL_DIR, exist_ok=True) + snapshot_download( "meta-llama/Llama-2-13b-chat-hf", - local_dir="/model", + local_dir=MODEL_DIR, token=os.environ["HUGGINGFACE_TOKEN"], ) -MODEL_DIR = "/model" - # ### Image definition # We’ll start from a Dockerhub image recommended by `vLLM`, upgrade the older # version of `torch` to a new one specifically built for CUDA 11.8. Next, we install `vLLM` from source to get the latest updates.