From 10411dd124a84742eb166820b38084fc52f4f0ee Mon Sep 17 00:00:00 2001 From: Kenny Ning Date: Fri, 2 Feb 2024 18:04:36 -0500 Subject: [PATCH 1/3] pin pytorch --- 06_gpu_and_ml/vllm_inference.py | 9 ++++++++- 06_gpu_and_ml/vllm_mixtral.py | 9 ++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/06_gpu_and_ml/vllm_inference.py b/06_gpu_and_ml/vllm_inference.py index e3554e062..1138c3793 100644 --- a/06_gpu_and_ml/vllm_inference.py +++ b/06_gpu_and_ml/vllm_inference.py @@ -62,7 +62,14 @@ def download_model_to_folder(): Image.from_registry( "nvidia/cuda:12.1.0-base-ubuntu22.04", add_python="3.10" ) - .pip_install("vllm==0.2.5", "huggingface_hub==0.19.4", "hf-transfer==0.1.4") + .pip_install( + "vllm==0.2.5", + "huggingface_hub==0.19.4", + "hf-transfer==0.1.4", + "torch==2.1.2", + "torchvision==0.16.2", + "torchaudio==2.1.2" + ) # Use the barebones hf-transfer package for maximum download speeds. No progress bar, but expect 700MB/s. .env({"HF_HUB_ENABLE_HF_TRANSFER": "1"}) .run_function( diff --git a/06_gpu_and_ml/vllm_mixtral.py b/06_gpu_and_ml/vllm_mixtral.py index f28caa750..3fef9b5a5 100644 --- a/06_gpu_and_ml/vllm_mixtral.py +++ b/06_gpu_and_ml/vllm_mixtral.py @@ -60,7 +60,14 @@ def download_model_to_folder(): Image.from_registry( "nvidia/cuda:12.1.0-base-ubuntu22.04", add_python="3.10" ) - .pip_install("vllm==0.2.5", "huggingface_hub==0.19.4", "hf-transfer==0.1.4") + .pip_install( + "vllm==0.2.5", + "huggingface_hub==0.19.4", + "hf-transfer==0.1.4", + "torch==2.1.2", + "torchvision==0.16.2", + "torchaudio==2.1.2" + ) .env({"HF_HUB_ENABLE_HF_TRANSFER": "1"}) .run_function(download_model_to_folder, timeout=60 * 20) ) From 4eb95455ba770b7cd429533afc353d6dee28beab Mon Sep 17 00:00:00 2001 From: Kenny Ning Date: Fri, 2 Feb 2024 18:36:16 -0500 Subject: [PATCH 2/3] fix formatting and secrets argument --- 06_gpu_and_ml/vllm_inference.py | 2 +- 06_gpu_and_ml/vllm_mixtral.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/06_gpu_and_ml/vllm_inference.py b/06_gpu_and_ml/vllm_inference.py index 1138c3793..3e216f53c 100644 --- a/06_gpu_and_ml/vllm_inference.py +++ b/06_gpu_and_ml/vllm_inference.py @@ -68,7 +68,7 @@ def download_model_to_folder(): "hf-transfer==0.1.4", "torch==2.1.2", "torchvision==0.16.2", - "torchaudio==2.1.2" + "torchaudio==2.1.2", ) # Use the barebones hf-transfer package for maximum download speeds. No progress bar, but expect 700MB/s. .env({"HF_HUB_ENABLE_HF_TRANSFER": "1"}) diff --git a/06_gpu_and_ml/vllm_mixtral.py b/06_gpu_and_ml/vllm_mixtral.py index 3fef9b5a5..aa128a01f 100644 --- a/06_gpu_and_ml/vllm_mixtral.py +++ b/06_gpu_and_ml/vllm_mixtral.py @@ -66,7 +66,7 @@ def download_model_to_folder(): "hf-transfer==0.1.4", "torch==2.1.2", "torchvision==0.16.2", - "torchaudio==2.1.2" + "torchaudio==2.1.2", ) .env({"HF_HUB_ENABLE_HF_TRANSFER": "1"}) .run_function(download_model_to_folder, timeout=60 * 20) From ba078e59cb99a621b97391f7a0c7c40a09bdf0e4 Mon Sep 17 00:00:00 2001 From: Akshat Bubna Date: Sat, 3 Feb 2024 02:49:40 +0000 Subject: [PATCH 3/3] remove torchvision and audio --- 06_gpu_and_ml/vllm_inference.py | 2 -- 06_gpu_and_ml/vllm_mixtral.py | 2 -- 2 files changed, 4 deletions(-) diff --git a/06_gpu_and_ml/vllm_inference.py b/06_gpu_and_ml/vllm_inference.py index 3e216f53c..875ed7909 100644 --- a/06_gpu_and_ml/vllm_inference.py +++ b/06_gpu_and_ml/vllm_inference.py @@ -67,8 +67,6 @@ def download_model_to_folder(): "huggingface_hub==0.19.4", "hf-transfer==0.1.4", "torch==2.1.2", - "torchvision==0.16.2", - "torchaudio==2.1.2", ) # Use the barebones hf-transfer package for maximum download speeds. No progress bar, but expect 700MB/s. .env({"HF_HUB_ENABLE_HF_TRANSFER": "1"}) diff --git a/06_gpu_and_ml/vllm_mixtral.py b/06_gpu_and_ml/vllm_mixtral.py index aa128a01f..456b818fe 100644 --- a/06_gpu_and_ml/vllm_mixtral.py +++ b/06_gpu_and_ml/vllm_mixtral.py @@ -65,8 +65,6 @@ def download_model_to_folder(): "huggingface_hub==0.19.4", "hf-transfer==0.1.4", "torch==2.1.2", - "torchvision==0.16.2", - "torchaudio==2.1.2", ) .env({"HF_HUB_ENABLE_HF_TRANSFER": "1"}) .run_function(download_model_to_folder, timeout=60 * 20)