diff --git a/neps/exceptions.py b/neps/exceptions.py index 597dfb1f..7054d7c6 100644 --- a/neps/exceptions.py +++ b/neps/exceptions.py @@ -45,3 +45,10 @@ class TrialNotFoundError(VersionedResourceDoesNotExistsError): class WorkerFailedToGetPendingTrialsError(NePSError): """Raised when a worker failed to get pending trials.""" + + +class WorkerRaiseError(NePSError): + """Raised from a worker when an error is raised. + + Includes additional information on how to recover + """ diff --git a/neps/runtime.py b/neps/runtime.py index c9988f70..5cf0f29f 100644 --- a/neps/runtime.py +++ b/neps/runtime.py @@ -26,6 +26,7 @@ NePSError, VersionMismatchError, WorkerFailedToGetPendingTrialsError, + WorkerRaiseError, ) from neps.state._eval import evaluate_trial from neps.state.filebased import create_or_load_filebased_neps_state @@ -205,15 +206,27 @@ def _check_if_should_stop( # noqa: C901, PLR0912, PLR0911 OnErrorPossibilities.STOP_WORKER_ERROR, OnErrorPossibilities.STOP_ANY_ERROR, ): + msg = ( + "Error occurred while evaluating a configuration with this worker and" + f" the worker is set to stop with {self.settings.on_error}." + "\n" + "\n" + "If this was a bug in the evaluation code while you were developing your" + " pipeline and you have set ignore_errors=True, please delete" + " your results folder and fix the error before re-running." + "\n" + "If this is an issue specifically with the configuration, considering" + " setting `ignore_errors=False` to allow the worker to continue" + " evaluating other configurations, even if this one failed." + "\n" + "\n" + ) if self.settings.on_error in ( OnErrorPossibilities.RAISE_WORKER_ERROR, OnErrorPossibilities.RAISE_ANY_ERROR, ): - raise error_from_this_worker - return ( - "Error occurred while evaluating a configuration with this worker and" - f" the worker is set to stop with {self.settings.on_error}." - ) + raise WorkerRaiseError(msg) from error_from_this_worker + return msg if ( self.settings.max_evaluations_for_worker is not None @@ -265,14 +278,23 @@ def _check_if_should_stop( # noqa: C901, PLR0912, PLR0911 ): err = self.state._shared_errors.synced().latest_err_as_raisable() if err is not None: - if self.settings.on_error == OnErrorPossibilities.RAISE_ANY_ERROR: - raise err - - return ( + msg = ( "An error occurred in another worker and this worker is set to stop" f" with {self.settings.on_error}." - "\n To allow more evaluations, use a different stopping criterion." + "\n" + "If this was a bug in the evaluation code while you were developing" + " your pipeline and you have set ignore_errors=True, please delete" + " your results folder and fix the error before re-running." + "\n" + "If this is an issue specifically with the configuration, considering" + " setting `ignore_errors=False` to allow the worker to continue" + " evaluating other configurations, even if any worker fails." + "\n" ) + if self.settings.on_error == OnErrorPossibilities.RAISE_ANY_ERROR: + raise WorkerRaiseError(msg) from err + + return msg # If there are no global stopping criterion, we can no just return early. if (