Skip to content

Commit

Permalink
remove vllm hash
Browse files Browse the repository at this point in the history
  • Loading branch information
gongy committed Dec 15, 2023
1 parent 9b94d81 commit 2720850
Showing 1 changed file with 2 additions and 4 deletions.
6 changes: 2 additions & 4 deletions 06_gpu_and_ml/vllm_mixtral.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,16 +56,14 @@ def download_model_to_folder():
# run_function to run the function defined above to ensure the weights of
# the model are saved within the container image.

VLLM_HASH = "89523c8293bc02a4dfaaa80079a5347dc3952464a33a501d5de329921eea7ec7"

image = (
Image.from_registry("nvidia/cuda:12.1.0-base-ubuntu22.04", add_python="3.10")
.pip_install("vllm==0.2.5", "huggingface_hub==0.19.4", "hf-transfer==0.1.4")
.env({"HF_HUB_ENABLE_HF_TRANSFER": "1"})
.run_function(download_model_to_folder, timeout=60 * 20)
)

stub = Stub("example-vllm-inference", image=image)
stub = Stub("example-vllm-mixtral", image=image)


# ## The model class
Expand Down Expand Up @@ -131,7 +129,7 @@ async def completion_stream(self, user_question):
)
index, num_tokens = 0, 0
async for output in result_generator:
if "\ufffd" == output.outputs[0].text[-1]:
if output.outputs[0].text and "\ufffd" == output.outputs[0].text[-1]:
continue
text_delta = output.outputs[0].text[index:]
index = len(output.outputs[0].text)
Expand Down

0 comments on commit 2720850

Please sign in to comment.