From 4acaab0c3b8534e9443203e610edc2ac51541ec3 Mon Sep 17 00:00:00 2001
From: Richard Gong <richard@modal.com>
Date: Fri, 15 Dec 2023 00:01:11 +0000
Subject: [PATCH] remove vllm hash

---
 06_gpu_and_ml/vllm_mixtral.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/06_gpu_and_ml/vllm_mixtral.py b/06_gpu_and_ml/vllm_mixtral.py
index 4021726d0..6f4f3d5b6 100644
--- a/06_gpu_and_ml/vllm_mixtral.py
+++ b/06_gpu_and_ml/vllm_mixtral.py
@@ -56,8 +56,6 @@ def download_model_to_folder():
 # run_function to run the function defined above to ensure the weights of
 # the model are saved within the container image.
 
-VLLM_HASH = "89523c8293bc02a4dfaaa80079a5347dc3952464a33a501d5de329921eea7ec7"
-
 image = (
     Image.from_registry("nvidia/cuda:12.1.0-base-ubuntu22.04", add_python="3.10")
     .pip_install("vllm==0.2.5", "huggingface_hub==0.19.4", "hf-transfer==0.1.4")
@@ -65,7 +63,7 @@ def download_model_to_folder():
     .run_function(download_model_to_folder, timeout=60 * 20)
 )
 
-stub = Stub("example-vllm-inference", image=image)
+stub = Stub("example-vllm-mixtral", image=image)
 
 
 # ## The model class
@@ -131,7 +129,7 @@ async def completion_stream(self, user_question):
         )
         index, num_tokens = 0, 0
         async for output in result_generator:
-            if "\ufffd" == output.outputs[0].text[-1]:
+            if output.outputs[0].text and "\ufffd" == output.outputs[0].text[-1]:
                 continue
             text_delta = output.outputs[0].text[index:]
             index = len(output.outputs[0].text)