Skip to content

Commit

Permalink
[Misc] Deprecation Warning when setting --engine-use-ray (vllm-projec…
Browse files Browse the repository at this point in the history
…t#7424)

Signed-off-by: Wallas Santos <[email protected]>
Co-authored-by: youkaichao <[email protected]>
Co-authored-by: Nick Hill <[email protected]>
Co-authored-by: youkaichao <[email protected]>
  • Loading branch information
4 people authored Aug 14, 2024
1 parent 67d115d commit 70b746e
Show file tree
Hide file tree
Showing 7 changed files with 56 additions and 3 deletions.
9 changes: 8 additions & 1 deletion tests/async_engine/test_api_server.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import subprocess
import sys
import time
Expand Down Expand Up @@ -35,11 +36,17 @@ def api_server(tokenizer_pool_size: int, engine_use_ray: bool,
"127.0.0.1", "--tokenizer-pool-size",
str(tokenizer_pool_size)
]

# Copy the environment variables and append `VLLM_ALLOW_ENGINE_USE_RAY=1`
# to prevent `--engine-use-ray` raises an exception due to it deprecation
env_vars = os.environ.copy()
env_vars["VLLM_ALLOW_ENGINE_USE_RAY"] = "1"

if engine_use_ray:
commands.append("--engine-use-ray")
if worker_use_ray:
commands.append("--worker-use-ray")
uvicorn_process = subprocess.Popen(commands)
uvicorn_process = subprocess.Popen(commands, env=env_vars)
yield
uvicorn_process.terminate()

Expand Down
6 changes: 6 additions & 0 deletions tests/async_engine/test_async_llm_engine.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import asyncio
import os
from dataclasses import dataclass

import pytest
Expand Down Expand Up @@ -106,11 +107,16 @@ async def test_new_requests_event():
assert engine.engine.add_request_calls == 3
assert engine.engine.step_calls == old_step_calls + 1

# Allow deprecated engine_use_ray to not raise exception
os.environ["VLLM_ALLOW_ENGINE_USE_RAY"] = "1"

engine = MockAsyncLLMEngine(worker_use_ray=True, engine_use_ray=True)
assert engine.get_model_config() is not None
assert engine.get_tokenizer() is not None
assert engine.get_decoding_config() is not None

os.environ.pop("VLLM_ALLOW_ENGINE_USE_RAY")


def test_asyncio_run():
wait_for_gpu_memory_to_clear(
Expand Down
6 changes: 5 additions & 1 deletion tests/async_engine/test_openapi_server_ray.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,11 @@ def server():
str(chatml_jinja_path),
]

with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
# Allow `--engine-use-ray`, otherwise the launch of the server throw
# an error due to try to use a deprecated feature
env_dict = {"VLLM_ALLOW_ENGINE_USE_RAY": "1"}
with RemoteOpenAIServer(MODEL_NAME, args,
env_dict=env_dict) as remote_server:
yield remote_server


Expand Down
6 changes: 6 additions & 0 deletions tests/spec_decode/e2e/conftest.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import asyncio
import os
from itertools import cycle
from typing import Dict, List, Optional, Sequence, Tuple, Union

Expand Down Expand Up @@ -56,6 +57,11 @@ def __init__(
) -> None:
if "disable_log_stats" not in kwargs:
kwargs["disable_log_stats"] = True

# Needed to engine_use_ray works as a deprecated feature,
# otherwise the following constructor will raise an exception
os.environ["VLLM_ALLOW_ENGINE_USE_RAY"] = "1"

engine_args = AsyncEngineArgs(
model=model,
tokenizer=tokenizer,
Expand Down
8 changes: 7 additions & 1 deletion vllm/engine/arg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -923,7 +923,13 @@ def add_cli_args(parser: FlexibleArgumentParser,
parser.add_argument('--engine-use-ray',
action='store_true',
help='Use Ray to start the LLM engine in a '
'separate process as the server process.')
'separate process as the server process.'
'(DEPRECATED. This argument is deprecated '
'and will be removed in a future update. '
'Set `VLLM_ALLOW_ENGINE_USE_RAY=1` to force '
'use it. See '
'https://github.com/vllm-project/vllm/issues/7045.'
')')
parser.add_argument('--disable-log-requests',
action='store_true',
help='Disable logging requests.')
Expand Down
15 changes: 15 additions & 0 deletions vllm/engine/async_llm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from vllm.sampling_params import SamplingParams
from vllm.sequence import ExecuteModelRequest, SamplerOutput
from vllm.usage.usage_lib import UsageContext
from vllm.utils import print_warning_once

logger = init_logger(__name__)
ENGINE_ITERATION_TIMEOUT_S = envs.VLLM_ENGINE_ITERATION_TIMEOUT_S
Expand Down Expand Up @@ -510,6 +511,20 @@ def __init__(self,
self.log_requests = log_requests
self.engine = self._init_engine(*args, **kwargs)

if self.engine_use_ray:
print_warning_once(
"DEPRECATED. `--engine-use-ray` is deprecated and will "
"be removed in a future update. "
"See https://github.com/vllm-project/vllm/issues/7045.")

if envs.VLLM_ALLOW_ENGINE_USE_RAY:
print_warning_once(
"VLLM_ALLOW_ENGINE_USE_RAY is set, force engine use Ray")
else:
raise ValueError("`--engine-use-ray` is deprecated. "
"Set `VLLM_ALLOW_ENGINE_USE_RAY=1` to "
"force use it")

self.background_loop: Optional[asyncio.Future] = None
# We need to keep a reference to unshielded
# task as well to prevent it from being garbage
Expand Down
9 changes: 9 additions & 0 deletions vllm/envs.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
VERBOSE: bool = False
VLLM_ALLOW_LONG_MAX_MODEL_LEN: bool = False
VLLM_TEST_FORCE_FP8_MARLIN: bool = False
VLLM_ALLOW_ENGINE_USE_RAY: bool = False
VLLM_PLUGINS: Optional[List[str]] = None


Expand Down Expand Up @@ -364,6 +365,14 @@ def get_default_config_root():
(os.environ.get("VLLM_TEST_FORCE_FP8_MARLIN", "0").strip().lower() in
("1", "true")),

# If set, allow running the engine as a separate ray actor,
# which is a deprecated feature soon to be removed.
# See https://github.com/vllm-project/vllm/issues/7045
"VLLM_ALLOW_ENGINE_USE_RAY":
lambda:
(os.environ.get("VLLM_ALLOW_ENGINE_USE_RAY", "0").strip().lower() in
("1", "true")),

# a list of plugin names to load, separated by commas.
# if this is not set, it means all plugins will be loaded
# if this is set to an empty string, no plugins will be loaded
Expand Down

0 comments on commit 70b746e

Please sign in to comment.