From e68b3be8adc927961d124ab193d6324dfc2a35b3 Mon Sep 17 00:00:00 2001 From: Jonathan Karlsen Date: Fri, 30 Aug 2024 10:20:08 +0200 Subject: [PATCH] Change driver `retries` param to `total_attempts` --- src/ert/scheduler/driver.py | 6 +++--- src/ert/scheduler/lsf_driver.py | 10 +++++----- src/ert/scheduler/openpbs_driver.py | 4 ++-- src/ert/scheduler/slurm_driver.py | 2 +- tests/unit_tests/scheduler/test_lsf_driver.py | 2 +- tests/unit_tests/scheduler/test_openpbs_driver.py | 2 +- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/ert/scheduler/driver.py b/src/ert/scheduler/driver.py index 75128f173dd..4d2f8f5b4e1 100644 --- a/src/ert/scheduler/driver.py +++ b/src/ert/scheduler/driver.py @@ -80,7 +80,7 @@ async def _execute_with_retry( retry_codes: Iterable[int] = (), accept_codes: Iterable[int] = (), stdin: Optional[bytes] = None, - retries: int = 1, + total_attempts: int = 1, retry_interval: float = 1.0, driverlogger: Optional[logging.Logger] = None, exit_on_msgs: Iterable[str] = (), @@ -89,7 +89,7 @@ async def _execute_with_retry( _logger = driverlogger or logging.getLogger(__name__) error_message: Optional[str] = None - for _ in range(retries): + for _ in range(total_attempts): process = await asyncio.create_subprocess_exec( *cmd_with_args, stdin=asyncio.subprocess.PIPE if stdin else None, @@ -139,7 +139,7 @@ async def _execute_with_retry( await asyncio.sleep(retry_interval) error_message = ( - f'Command "{shlex.join(cmd_with_args)}" failed after {retries} retries ' + f'Command "{shlex.join(cmd_with_args)}" failed after {total_attempts} attempts ' f"with {outputs}" ) _logger.error(error_message) diff --git a/src/ert/scheduler/lsf_driver.py b/src/ert/scheduler/lsf_driver.py index 66a424b7bba..ca0c0cb344f 100644 --- a/src/ert/scheduler/lsf_driver.py +++ b/src/ert/scheduler/lsf_driver.py @@ -332,7 +332,7 @@ async def submit( bsub_with_args, retry_on_empty_stdout=True, retry_codes=(FLAKY_SSH_RETURNCODE,), - retries=self._bsub_retries, + total_attempts=self._bsub_retries, retry_interval=self._sleep_time_between_cmd_retries, ) if not process_success: @@ -384,7 +384,7 @@ async def kill(self, iens: int) -> None: _, process_message = await self._execute_with_retry( bkill_with_args, retry_codes=(FLAKY_SSH_RETURNCODE,), - retries=3, + total_attempts=3, retry_interval=self._sleep_time_between_cmd_retries, exit_on_msgs=(JOB_ALREADY_FINISHED_BKILL_MSG), ) @@ -496,7 +496,7 @@ async def _get_exit_code(self, job_id: str) -> int: success, output = await self._execute_with_retry( [f"{self._bjobs_cmd}", "-o exit_code", "-noheader", f"{job_id}"], retry_codes=(FLAKY_SSH_RETURNCODE,), - retries=3, + total_attempts=3, retry_interval=self._sleep_time_between_cmd_retries, ) @@ -514,7 +514,7 @@ async def _get_exit_code_from_bhist(self, job_id: str) -> int: success, output = await self._execute_with_retry( [f"{self._bhist_cmd}", "-l", "-n2", f"{job_id}"], retry_codes=(FLAKY_SSH_RETURNCODE,), - retries=3, + total_attempts=3, retry_interval=self._sleep_time_between_cmd_retries, ) @@ -534,7 +534,7 @@ async def _log_bhist_job_summary(self, job_id: str) -> None: _, process_message = await self._execute_with_retry( bhist_with_args, retry_codes=(FLAKY_SSH_RETURNCODE,), - retries=3, + total_attempts=3, retry_interval=self._sleep_time_between_cmd_retries, log_to_debug=False, ) diff --git a/src/ert/scheduler/openpbs_driver.py b/src/ert/scheduler/openpbs_driver.py index 1bafef34b6c..8aa1ee7daf0 100644 --- a/src/ert/scheduler/openpbs_driver.py +++ b/src/ert/scheduler/openpbs_driver.py @@ -268,7 +268,7 @@ async def submit( QSUB_CONNECTION_REFUSED, ), stdin=script.encode(encoding="utf-8"), - retries=self._num_pbs_cmd_retries, + total_attempts=self._num_pbs_cmd_retries, retry_interval=self._sleep_time_between_cmd_retries, driverlogger=logger, ) @@ -298,7 +298,7 @@ async def kill(self, iens: int) -> None: [str(self._qdel_cmd), str(job_id)], retry_codes=(QDEL_REQUEST_INVALID,), accept_codes=(QDEL_JOB_HAS_FINISHED,), - retries=self._num_pbs_cmd_retries, + total_attempts=self._num_pbs_cmd_retries, retry_interval=self._sleep_time_between_cmd_retries, driverlogger=logger, ) diff --git a/src/ert/scheduler/slurm_driver.py b/src/ert/scheduler/slurm_driver.py index 73ac271fef3..bb9d9fae31a 100644 --- a/src/ert/scheduler/slurm_driver.py +++ b/src/ert/scheduler/slurm_driver.py @@ -209,7 +209,7 @@ async def submit( sbatch_with_args, retry_on_empty_stdout=True, retry_codes=(), - retries=self._sbatch_retries, + total_attempts=self._sbatch_retries, retry_interval=self._sleep_time_between_cmd_retries, ) if not process_success: diff --git a/tests/unit_tests/scheduler/test_lsf_driver.py b/tests/unit_tests/scheduler/test_lsf_driver.py index 3eaf6760055..aa8c3310d6b 100644 --- a/tests/unit_tests/scheduler/test_lsf_driver.py +++ b/tests/unit_tests/scheduler/test_lsf_driver.py @@ -560,7 +560,7 @@ async def test_that_bsub_will_retry_and_fail( driver._bsub_retries = 2 driver._sleep_time_between_cmd_retries = 0.2 match_str = ( - f'failed after 2 retries with exit code {exit_code}.*error: "{error_msg if error_msg else ""}"' + f'failed after 2 attempts with exit code {exit_code}.*error: "{error_msg if error_msg else ""}"' if exit_code != 199 else 'failed with exit code 199.*error: "Not recognized"' ) diff --git a/tests/unit_tests/scheduler/test_openpbs_driver.py b/tests/unit_tests/scheduler/test_openpbs_driver.py index 39cc7ab5b76..2ad8821c1a2 100644 --- a/tests/unit_tests/scheduler/test_openpbs_driver.py +++ b/tests/unit_tests/scheduler/test_openpbs_driver.py @@ -393,7 +393,7 @@ async def test_that_qsub_will_retry_and_fail( driver._num_pbs_cmd_retries = 2 driver._sleep_time_between_cmd_retries = 0.2 match_str = ( - f'failed after 2 retries with exit code {exit_code}.*error: "{error_msg}"' + f'failed after 2 attempts with exit code {exit_code}.*error: "{error_msg}"' if exit_code != 199 else 'failed with exit code 199.*error: "Not recognized"' )