Skip to content

Commit

Permalink
v1: use multiprocessing by default
Browse files Browse the repository at this point in the history
Previously, this code forced the use of the `spawn` multiprocessing
method. Since we know this causes problems in some configurations,
multiprocessing as off by default.

This change turns it on by default and makes use of existing code that
tries to choose the best multiprocessing method based on what we can
detect.

- use `fork` by default
- use `spawn` if CUDA has already been initialized, but give a warning

This same logic is already in use for spawning multiple workers for v1
tensor parallelism support.

The design doc `docs/design/multiprocessing.md` covers this topic in
more detail.

Signed-off-by: Russell Bryant <[email protected]>
  • Loading branch information
russellb committed Dec 10, 2024
1 parent ec4477c commit be8449c
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 14 deletions.
4 changes: 2 additions & 2 deletions vllm/envs.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
VLLM_SKIP_P2P_CHECK: bool = False
VLLM_DISABLED_KERNELS: List[str] = []
VLLM_USE_V1: bool = False
VLLM_ENABLE_V1_MULTIPROCESSING: bool = False
VLLM_ENABLE_V1_MULTIPROCESSING: bool = True
VLLM_LOG_BATCHSIZE_INTERVAL: float = -1


Expand Down Expand Up @@ -452,7 +452,7 @@ def get_default_config_root():

# If set, enable multiprocessing in LLM for the V1 code path.
"VLLM_ENABLE_V1_MULTIPROCESSING":
lambda: bool(int(os.getenv("VLLM_ENABLE_V1_MULTIPROCESSING", "0"))),
lambda: bool(int(os.getenv("VLLM_ENABLE_V1_MULTIPROCESSING", "1"))),
"VLLM_LOG_BATCHSIZE_INTERVAL":
lambda: float(os.getenv("VLLM_LOG_BATCHSIZE_INTERVAL", "-1")),
}
Expand Down
17 changes: 11 additions & 6 deletions vllm/executor/multiproc_worker_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,17 @@ def write_with_prefix(s: str):
file.write = write_with_prefix # type: ignore[method-assign]


def _check_multiproc_method():
if (cuda_is_initialized()
and os.environ.get("VLLM_WORKER_MULTIPROC_METHOD") != "spawn"):
logger.warning("CUDA was previously initialized. We must use "
"the `spawn` multiprocessing start method. Setting "
"VLLM_WORKER_MULTIPROC_METHOD to 'spawn'.")
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"


def get_mp_context():
_check_multiproc_method()
mp_method = envs.VLLM_WORKER_MULTIPROC_METHOD
return multiprocessing.get_context(mp_method)

Expand All @@ -284,12 +294,7 @@ def set_multiprocessing_worker_envs(parallel_config):
in a multiprocessing environment. This should be called by the parent
process before worker processes are created"""

if (cuda_is_initialized()
and os.environ.get("VLLM_WORKER_MULTIPROC_METHOD") != "spawn"):
logger.warning("CUDA was previously initialized. We must use "
"the `spawn` multiprocessing start method. Setting "
"VLLM_WORKER_MULTIPROC_METHOD to 'spawn'.")
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
_check_multiproc_method()

# Configure thread parallelism if OMP_NUM_THREADS isn't set
#
Expand Down
8 changes: 2 additions & 6 deletions vllm/v1/engine/core.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import multiprocessing
import pickle
import queue
import signal
Expand All @@ -13,6 +12,7 @@
from msgspec import msgpack

from vllm.config import CacheConfig, VllmConfig
from vllm.executor.multiproc_worker_utils import get_mp_context
from vllm.logger import init_logger
from vllm.usage.usage_lib import UsageContext
from vllm.v1.core.scheduler import Scheduler
Expand Down Expand Up @@ -197,11 +197,7 @@ def make_engine_core_process(
ready_path: str,
should_shutdown: Synchronized,
) -> BaseProcess:
# The current process might have CUDA context,
# so we need to spawn a new process.
# NOTE(rob): this is a problem for using EngineCoreProc w/
# LLM, since we need a if __name__ == "__main__" guard.
context = multiprocessing.get_context("spawn")
context = get_mp_context()

process_kwargs = {
"input_path": input_path,
Expand Down

0 comments on commit be8449c

Please sign in to comment.