Skip to content

Commit

Permalink
Bump vLLM Mixtral example to 0.2.6
Browse files Browse the repository at this point in the history
  • Loading branch information
gongy committed Jan 29, 2024
1 parent 55497df commit ec5e8ea
Showing 1 changed file with 2 additions and 11 deletions.
13 changes: 2 additions & 11 deletions 06_gpu_and_ml/vllm_mixtral.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def download_model_to_folder():
Image.from_registry(
"nvidia/cuda:12.1.0-base-ubuntu22.04", add_python="3.10"
)
.pip_install("vllm==0.2.5", "huggingface_hub==0.19.4", "hf-transfer==0.1.4")
.pip_install("vllm==0.2.6", "huggingface_hub==0.19.4", "hf-transfer==0.1.4")
.env({"HF_HUB_ENABLE_HF_TRANSFER": "1"})
.run_function(download_model_to_folder, timeout=60 * 20)
)
Expand All @@ -74,9 +74,7 @@ def download_model_to_folder():
# This enables us to load the model into memory just once every time a container starts up, and keep it cached
# on the GPU for each subsequent invocation of the function.
#
# The `vLLM` library allows the code to remain quite clean. There are, however, some
# outstanding issues and performance improvements that we patch here, such as multi-GPU setup and
# suboptimal Ray CPU pinning.
# The `vLLM` library allows the code to remain quite clean. We do have to patch the multi-GPU setup due to issues with Ray.
@stub.cls(
gpu=GPU_CONFIG,
timeout=60 * 10,
Expand Down Expand Up @@ -105,13 +103,6 @@ def __enter__(self):
self.engine = AsyncLLMEngine.from_engine_args(engine_args)
self.template = "<s> [INST] {user} [/INST] "

# Performance improvement from https://github.com/vllm-project/vllm/issues/2073#issuecomment-1853422529
if GPU_CONFIG.count > 1:
import subprocess

RAY_CORE_PIN_OVERRIDE = "cpuid=0 ; for pid in $(ps xo '%p %c' | grep ray:: | awk '{print $1;}') ; do taskset -cp $cpuid $pid ; cpuid=$(($cpuid + 1)) ; done"
subprocess.call(RAY_CORE_PIN_OVERRIDE, shell=True)

@method()
async def completion_stream(self, user_question):
from vllm import SamplingParams
Expand Down

0 comments on commit ec5e8ea

Please sign in to comment.