From d67baf4de8fe3bab9cad453bb38c879b1767b57e Mon Sep 17 00:00:00 2001 From: Yuhong Wen Date: Wed, 20 Dec 2023 19:06:26 -0500 Subject: [PATCH] Fixed a race condition issue during the server start. (#2235) Co-authored-by: Chester Chen <512707+chesterxgchen@users.noreply.github.com> --- nvflare/private/fed/app/deployer/server_deployer.py | 2 ++ nvflare/private/fed/server/fed_server.py | 3 +++ 2 files changed, 5 insertions(+) diff --git a/nvflare/private/fed/app/deployer/server_deployer.py b/nvflare/private/fed/app/deployer/server_deployer.py index 9f21500a96..e800820497 100644 --- a/nvflare/private/fed/app/deployer/server_deployer.py +++ b/nvflare/private/fed/app/deployer/server_deployer.py @@ -22,6 +22,7 @@ from nvflare.private.fed.server.job_runner import JobRunner from nvflare.private.fed.server.run_manager import RunManager from nvflare.private.fed.server.server_cmd_modules import ServerCommandModules +from nvflare.private.fed.server.server_status import ServerStatus class ServerDeployer: @@ -121,6 +122,7 @@ def deploy(self, args): services.engine.fire_event(EventType.SYSTEM_BOOTSTRAP, fl_ctx) threading.Thread(target=self._start_job_runner, args=[job_runner, fl_ctx]).start() + services.status = ServerStatus.STARTED services.engine.fire_event(EventType.SYSTEM_START, fl_ctx) print("deployed FL server trainer.") diff --git a/nvflare/private/fed/server/fed_server.py b/nvflare/private/fed/server/fed_server.py index ebde004c52..d07dc99246 100644 --- a/nvflare/private/fed/server/fed_server.py +++ b/nvflare/private/fed/server/fed_server.py @@ -779,6 +779,9 @@ def _init_agent(self, args=None): return self.overseer_agent def _check_server_state(self, overseer_agent): + if self.status != ServerStatus.STARTED: + return + if overseer_agent.is_shutdown(): self.engine.shutdown_server() return