From 4acaab0c3b8534e9443203e610edc2ac51541ec3 Mon Sep 17 00:00:00 2001 From: Richard Gong Date: Fri, 15 Dec 2023 00:01:11 +0000 Subject: [PATCH] remove vllm hash --- 06_gpu_and_ml/vllm_mixtral.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/06_gpu_and_ml/vllm_mixtral.py b/06_gpu_and_ml/vllm_mixtral.py index 4021726d0..6f4f3d5b6 100644 --- a/06_gpu_and_ml/vllm_mixtral.py +++ b/06_gpu_and_ml/vllm_mixtral.py @@ -56,8 +56,6 @@ def download_model_to_folder(): # run_function to run the function defined above to ensure the weights of # the model are saved within the container image. -VLLM_HASH = "89523c8293bc02a4dfaaa80079a5347dc3952464a33a501d5de329921eea7ec7" - image = ( Image.from_registry("nvidia/cuda:12.1.0-base-ubuntu22.04", add_python="3.10") .pip_install("vllm==0.2.5", "huggingface_hub==0.19.4", "hf-transfer==0.1.4") @@ -65,7 +63,7 @@ def download_model_to_folder(): .run_function(download_model_to_folder, timeout=60 * 20) ) -stub = Stub("example-vllm-inference", image=image) +stub = Stub("example-vllm-mixtral", image=image) # ## The model class @@ -131,7 +129,7 @@ async def completion_stream(self, user_question): ) index, num_tokens = 0, 0 async for output in result_generator: - if "\ufffd" == output.outputs[0].text[-1]: + if output.outputs[0].text and "\ufffd" == output.outputs[0].text[-1]: continue text_delta = output.outputs[0].text[index:] index = len(output.outputs[0].text)