Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use ert queue options #9486

Merged
merged 3 commits into from
Jan 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 5 additions & 9 deletions src/ert/config/queue_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def driver_options(self) -> dict[str, Any]:

@pydantic.dataclasses.dataclass
class LocalQueueOptions(QueueOptions):
name: Literal[QueueSystem.LOCAL] = QueueSystem.LOCAL
name: Literal[QueueSystem.LOCAL, "local"] = "local"

@property
def driver_options(self) -> dict[str, Any]:
Expand All @@ -102,7 +102,7 @@ def driver_options(self) -> dict[str, Any]:

@pydantic.dataclasses.dataclass
class LsfQueueOptions(QueueOptions):
name: Literal[QueueSystem.LSF] = QueueSystem.LSF
name: Literal[QueueSystem.LSF, "lsf"] = "lsf"
bhist_cmd: NonEmptyString | None = None
bjobs_cmd: NonEmptyString | None = None
bkill_cmd: NonEmptyString | None = None
Expand All @@ -125,7 +125,7 @@ def driver_options(self) -> dict[str, Any]:

@pydantic.dataclasses.dataclass
class TorqueQueueOptions(QueueOptions):
name: Literal[QueueSystem.TORQUE] = QueueSystem.TORQUE
name: Literal[QueueSystem.TORQUE, "torque"] = "torque"
qsub_cmd: NonEmptyString | None = None
qstat_cmd: NonEmptyString | None = None
qdel_cmd: NonEmptyString | None = None
Expand All @@ -146,7 +146,7 @@ def driver_options(self) -> dict[str, Any]:

@pydantic.dataclasses.dataclass
class SlurmQueueOptions(QueueOptions):
name: Literal[QueueSystem.SLURM] = QueueSystem.SLURM
name: Literal[QueueSystem.SLURM, "slurm"] = "slurm"
sbatch: NonEmptyString = "sbatch"
scancel: NonEmptyString = "scancel"
scontrol: NonEmptyString = "scontrol"
Expand Down Expand Up @@ -258,7 +258,6 @@ class QueueConfig:
queue_options: (
LsfQueueOptions | TorqueQueueOptions | SlurmQueueOptions | LocalQueueOptions
) = pydantic.Field(default_factory=LocalQueueOptions, discriminator="name")
queue_options_test_run: LocalQueueOptions = field(default_factory=LocalQueueOptions)
stop_long_running: bool = False
max_runtime: int | None = None
preferred_num_cpu: int = 1
Expand Down Expand Up @@ -311,7 +310,6 @@ def from_dict(cls, config_dict: ConfigDict) -> QueueConfig:
)

queue_options = all_validated_queue_options[selected_queue_system]
queue_options_test_run = all_validated_queue_options[QueueSystem.LOCAL]
queue_options.add_global_queue_options(config_dict)

if queue_options.project_code is None:
Expand All @@ -329,7 +327,6 @@ def from_dict(cls, config_dict: ConfigDict) -> QueueConfig:
max_submit,
selected_queue_system,
queue_options,
queue_options_test_run,
stop_long_running=bool(stop_long_running),
max_runtime=config_dict.get(ConfigKeys.MAX_RUNTIME),
preferred_num_cpu=preferred_num_cpu,
Expand All @@ -341,8 +338,7 @@ def create_local_copy(self) -> QueueConfig:
self.realization_memory,
self.max_submit,
QueueSystem.LOCAL,
self.queue_options_test_run,
self.queue_options_test_run,
LocalQueueOptions(max_running=self.max_running),
stop_long_running=bool(self.stop_long_running),
max_runtime=self.max_runtime,
)
Expand Down
3 changes: 2 additions & 1 deletion src/ert/gui/simulation/experiment_panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
)
from ert.trace import get_trace_id

from ...config.queue_config import LocalQueueOptions
from ..summarypanel import SummaryPanel
from .combobox_with_description import QComboBoxWithDescription
from .ensemble_experiment_panel import EnsembleExperimentPanel
Expand Down Expand Up @@ -376,7 +377,7 @@ def populate_clipboard_debug_info(self) -> None:
queue_opts = self.config.queue_config.queue_options

if isinstance(self.get_current_experiment_type(), SingleTestRun):
queue_opts = self.config.queue_config.queue_options_test_run
queue_opts = LocalQueueOptions(max_running=1)

for field in fields(queue_opts):
field_value = getattr(queue_opts, field.name)
Expand Down
32 changes: 16 additions & 16 deletions src/ert/scheduler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,23 +19,23 @@


def create_driver(queue_options: QueueOptions) -> Driver:
if queue_options.name == QueueSystem.LOCAL:
return LocalDriver()
elif queue_options.name == QueueSystem.TORQUE:
return OpenPBSDriver(**queue_options.driver_options)
elif queue_options.name == QueueSystem.LSF:
return LsfDriver(**queue_options.driver_options)
elif queue_options.name == QueueSystem.SLURM:
return SlurmDriver(
**dict(
{"user": getpwuid(getuid()).pw_name},
**queue_options.driver_options,
match str(queue_options.name).upper():
case QueueSystem.LOCAL:
return LocalDriver()
case QueueSystem.TORQUE:
return OpenPBSDriver(**queue_options.driver_options)
case QueueSystem.LSF:
return LsfDriver(**queue_options.driver_options)
case QueueSystem.SLURM:
return SlurmDriver(
**dict(
{"user": getpwuid(getuid()).pw_name},
**queue_options.driver_options,
)
)
)
else:
raise NotImplementedError(
"Only LOCAL, SLURM, TORQUE and LSF drivers are implemented"
)
raise NotImplementedError(
"Only LOCAL, SLURM, TORQUE and LSF drivers are implemented"
)


__all__ = [
Expand Down
24 changes: 22 additions & 2 deletions src/everest/config/everest_config.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging
import os
from argparse import ArgumentParser
from copy import copy
from functools import cached_property
from io import StringIO
from itertools import chain
Expand Down Expand Up @@ -215,7 +216,7 @@ class EverestConfig(BaseModelWithPropertySupport): # type: ignore
""",
)
server: ServerConfig | None = Field(
default=None,
default_factory=ServerConfig,
description="""Defines Everest server settings, i.e., which queue system,
queue name and queue options are used for the everest server.
The main reason for changing this section is situations where everest
Expand Down Expand Up @@ -250,6 +251,25 @@ class EverestConfig(BaseModelWithPropertySupport): # type: ignore
config_path: Path = Field()
model_config = ConfigDict(extra="forbid")

@model_validator(mode="after")
def validate_queue_system(self) -> Self: # pylint: disable=E0213
if self.server is None:
self.server = ServerConfig(queue_system=copy(self.simulator.queue_system))
elif self.server.queue_system is None:
self.server.queue_system = copy(self.simulator.queue_system)
if (
str(self.simulator.queue_system.name).lower() == "local"
and str(self.server.queue_system.name).lower()
!= str(self.simulator.queue_system.name).lower()
):
raise ValueError(
f"The simulator is using local as queue system "
f"while the everest server is using {self.server.queue_system.name}. "
f"If the simulator is using local, so must the everest server."
)
self.server.queue_system.max_running = 1
return self

@model_validator(mode="after")
def validate_forward_model_job_name_installed(self) -> Self: # pylint: disable=E0213
install_jobs = self.install_jobs
Expand Down Expand Up @@ -745,7 +765,7 @@ def with_defaults(cls, **kwargs):
"model": {"realizations": [0]},
}

return EverestConfig.model_validate({**defaults, **kwargs})
return cls.model_validate({**defaults, **kwargs})

@staticmethod
def lint_config_dict(config: dict) -> list["ErrorDetails"]:
Expand Down
13 changes: 0 additions & 13 deletions src/everest/config/has_ert_queue_options.py

This file was deleted.

77 changes: 40 additions & 37 deletions src/everest/config/server_config.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
import json
import os
from typing import Literal
from typing import Any

from pydantic import BaseModel, ConfigDict, Field
from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator

from ert.config.queue_config import (
LocalQueueOptions,
LsfQueueOptions,
SlurmQueueOptions,
TorqueQueueOptions,
)
from ert.plugins import ErtPluginManager

from ..strings import (
CERTIFICATE_DIR,
Expand All @@ -11,46 +19,41 @@
SERVER_STATUS,
SESSION_DIR,
)
from .has_ert_queue_options import HasErtQueueOptions


class ServerConfig(BaseModel, HasErtQueueOptions): # type: ignore
name: str | None = Field(
None,
description="""Specifies which queue to use.

Examples are
* mr
* bigmem

The everest server generally has lower resource requirements than forward models such
as RMS and Eclipse.
""",
) # Corresponds to queue name
exclude_host: str | None = Field(
"",
description="""Comma separated list of nodes that should be
excluded from the slurm run""",
)
include_host: str | None = Field(
"",
description="""Comma separated list of nodes that
should be included in the slurm run""",
)
options: str | None = Field(
None,
description="""Used to specify options to LSF.
Examples to set memory requirement is:
* rusage[mem=1000]""",
)
queue_system: Literal["lsf", "local", "slurm"] | None = Field(
None,
description="Defines which queue system the everest server runs on.",
from .simulator_config import check_removed_config


class ServerConfig(BaseModel): # type: ignore
queue_system: (
LocalQueueOptions
| LsfQueueOptions
| SlurmQueueOptions
| TorqueQueueOptions
| None
) = Field(
default=None,
description="Defines which queue system the everest submits jobs to",
discriminator="name",
)
model_config = ConfigDict(
extra="forbid",
)

@field_validator("queue_system", mode="before")
@classmethod
def default_local_queue(cls, v):
if v is None:
return v
elif "activate_script" not in v and ErtPluginManager().activate_script():
v["activate_script"] = ErtPluginManager().activate_script()
return v

@model_validator(mode="before")
@classmethod
def check_old_config(cls, data: Any) -> Any:
if isinstance(data, dict):
check_removed_config(data.get("queue_system"))
return data

@staticmethod
def get_server_url(output_dir: str) -> str:
"""Return the url of the server.
Expand Down
Loading
Loading