diff --git a/06_gpu_and_ml/vllm_generic.py b/06_gpu_and_ml/vllm_generic.py index 96090d182..c4d1af34b 100644 --- a/06_gpu_and_ml/vllm_generic.py +++ b/06_gpu_and_ml/vllm_generic.py @@ -4,8 +4,7 @@ import time -from modal import Image, Stub, method, Secret - +from modal import Image, Stub, method vllm_image = ( Image.from_registry( @@ -25,9 +24,9 @@ ) class Model: def __init__(self, model_name: str): + import torch from vllm.engine.arg_utils import AsyncEngineArgs from vllm.engine.async_llm_engine import AsyncLLMEngine - import torch n_gpus = torch.cuda.device_count() diff --git a/06_gpu_and_ml/vllm_generic_client.py b/06_gpu_and_ml/vllm_generic_client.py index 6aec221d5..a89c9cd64 100644 --- a/06_gpu_and_ml/vllm_generic_client.py +++ b/06_gpu_and_ml/vllm_generic_client.py @@ -15,14 +15,12 @@ secrets=[hf_secret], gpu=modal.gpu.A100(memory=40), volumes={"/hf-cache": model_vol}, - allow_background_volume_commits=True, ) Model_80GB = Model.with_options( secrets=[hf_secret], gpu=modal.gpu.A100(memory=80), volumes={"/hf-cache": model_vol}, - allow_background_volume_commits=True, ) mistral7b = Model_40GB(model_name="mistralai/Mistral-7B-Instruct-v0.2")