Skip to content

Commit

Permalink
Remove deprecated torque option MEMORY_PER_JOB
Browse files Browse the repository at this point in the history
  • Loading branch information
jonathan-eq committed Dec 17, 2024
1 parent 726e50f commit 51cd724
Show file tree
Hide file tree
Showing 12 changed files with 17 additions and 176 deletions.
4 changes: 2 additions & 2 deletions docs/ert/reference/configuration/keywords.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1825,8 +1825,8 @@ in :ref:`queue-system-chapter`. In brief, the queue systems have the following o
``BHIST_CMD``, ``SUBMIT_SLEEP``, ``PROJECT_CODE``, ``EXCLUDE_HOST``,
``MAX_RUNNING``
* :ref:`TORQUE <pbs-systems>` — ``QSUB_CMD``, ``QSTAT_CMD``, ``QDEL_CMD``,
``QUEUE``, ``CLUSTER_LABEL``, ``MAX_RUNNING``,
``MEMORY_PER_JOB``, ``KEEP_QSUB_OUTPUT``, ``SUBMIT_SLEEP``
``QUEUE``, ``CLUSTER_LABEL``, ``MAX_RUNNING``, ``KEEP_QSUB_OUTPUT``,
``SUBMIT_SLEEP``
* :ref:`SLURM <slurm-systems>` — ``SBATCH``, ``SCANCEL``, ``SCONTROL``, ``SACCT``,
``SQUEUE``, ``PARTITION``, ``SQUEUE_TIMEOUT``, ``MAX_RUNTIME``, ``INCLUDE_HOST``,
``EXCLUDE_HOST``, ``MAX_RUNNING``
Expand Down
21 changes: 0 additions & 21 deletions docs/ert/reference/configuration/queue.rst
Original file line number Diff line number Diff line change
Expand Up @@ -277,27 +277,6 @@ The following is a list of all queue-specific configuration options:

If ``n`` is zero (the default), then it is set to the number of realizations.

.. _torque_memory_per_job:
.. topic:: MEMORY_PER_JOB

You can specify the amount of memory you will need for running your
job. This will ensure that not too many jobs will run on a single
shared memory node at once, possibly crashing the compute node if it
runs out of memory.

You can get an indication of the memory requirement by watching the
course of a local run using the ``htop`` utility. Whether you should set
the peak memory usage as your requirement or a lower figure depends on
how simultaneously each job will run.

The option to be supplied will be used as a string in the ``qsub``
argument. You must specify the unit, either ``gb`` or ``mb`` as in
the example::

QUEUE_OPTION TORQUE MEMORY_PER_JOB 16gb

By default, this value is not set.

.. _torque_keep_qsub_output:
.. topic:: KEEP_QSUB_OUTPUT

Expand Down
9 changes: 0 additions & 9 deletions docs/everest/config_generated.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1043,15 +1043,6 @@ Simulation settings
The name of the cluster you are running simulations in.


**memory_per_job (optional)**
Type: *Optional[str]*

You can specify the amount of memory you will need for running your job. This will ensure that not too many jobs will run on a single shared memory node at once, possibly crashing the compute node if it runs out of memory.
You can get an indication of the memory requirement by watching the course of a local run using the htop utility. Whether you should set the peak memory usage as your requirement or a lower figure depends on how simultaneously each job will run.
The option to be supplied will be used as a string in the qsub argument. You must specify the unit, either gb or mb.



**keep_qsub_output (optional)**
Type: *Optional[int]*

Expand Down
6 changes: 0 additions & 6 deletions src/ert/config/parsing/config_schema_deprecations.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,12 +181,6 @@
"for the Ensemble Smoother update algorithm. "
"Please use ENKF_ALPHA and STD_CUTOFF keywords instead.",
),
DeprecationInfo(
keyword="QUEUE_OPTION",
message="MEMORY_PER_JOB as QUEUE_OPTION to TORQUE is deprecated and will be removed in "
"the future. Replace by REALIZATION_MEMORY.",
check=lambda line: "MEMORY_PER_JOB" in line,
),
DeprecationInfo(
keyword="QUEUE_OPTION",
message="Memory requirements in LSF should now be set using REALIZATION_MEMORY and not"
Expand Down
19 changes: 0 additions & 19 deletions src/ert/config/queue_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,6 @@ class TorqueQueueOptions(QueueOptions):
qstat_cmd: NonEmptyString | None = None
qdel_cmd: NonEmptyString | None = None
queue: NonEmptyString | None = None
memory_per_job: NonEmptyString | None = None
cluster_label: NonEmptyString | None = None
job_prefix: NonEmptyString | None = None
keep_qsub_output: bool = False
Expand All @@ -140,13 +139,6 @@ def driver_options(self) -> dict[str, Any]:
driver_dict.pop("submit_sleep")
return driver_dict

@pydantic.field_validator("memory_per_job")
@classmethod
def check_memory_per_job(cls, value: str | None) -> str | None:
if not torque_memory_usage_format.validate(value):
raise ValueError("wrong memory format")
return value


@pydantic.dataclasses.dataclass
class SlurmQueueOptions(QueueOptions):
Expand Down Expand Up @@ -315,17 +307,6 @@ def from_dict(cls, config_dict: ConfigDict) -> QueueConfig:
if tags:
queue_options.project_code = "+".join(tags)

for _queue_vals in all_validated_queue_options.values():
if (
isinstance(_queue_vals, TorqueQueueOptions)
and _queue_vals.memory_per_job
and realization_memory
):
_throw_error_or_warning(
"Do not specify both REALIZATION_MEMORY and TORQUE option MEMORY_PER_JOB",
"MEMORY_PER_JOB",
selected_queue_system == QueueSystem.TORQUE,
)

return QueueConfig(
job_script,
Expand Down
9 changes: 0 additions & 9 deletions src/ert/scheduler/openpbs_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,6 @@ def __init__(
queue_name: str | None = None,
project_code: str | None = None,
keep_qsub_output: bool | None = None,
memory_per_job: str | None = None,
cluster_label: str | None = None,
job_prefix: str | None = None,
qsub_cmd: str | None = None,
Expand All @@ -137,7 +136,6 @@ def __init__(
self._queue_name = queue_name
self._project_code = project_code
self._keep_qsub_output = keep_qsub_output
self._memory_per_job = memory_per_job
self._cluster_label: str | None = cluster_label
self._job_prefix = job_prefix
self._max_pbs_cmd_attempts = 10
Expand All @@ -162,13 +160,6 @@ def _build_resource_string(
cpu_resources: list[str] = []
if num_cpu > 1:
cpu_resources += [f"ncpus={num_cpu}"]
if self._memory_per_job is not None and realization_memory > 0:
raise ValueError(
"Overspecified memory pr job. "
"Do not specify both memory_per_job and realization_memory"
)
if self._memory_per_job is not None:
cpu_resources += [f"mem={self._memory_per_job}"]
elif realization_memory > 0:
cpu_resources += [f"mem={realization_memory // 1024**2 }mb"]
if cpu_resources:
Expand Down
7 changes: 0 additions & 7 deletions src/everest/config/simulator_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,13 +123,6 @@ class SimulatorConfig(BaseModel, HasErtQueueOptions, extra="forbid"): # type: i
default=None,
description="The name of the cluster you are running simulations in.",
)
memory_per_job: str | None = Field(
default=None,
description="""You can specify the amount of memory you will need for running your job. This will ensure that not too many jobs will run on a single shared memory node at once, possibly crashing the compute node if it runs out of memory.
You can get an indication of the memory requirement by watching the course of a local run using the htop utility. Whether you should set the peak memory usage as your requirement or a lower figure depends on how simultaneously each job will run.
The option to be supplied will be used as a string in the qsub argument. You must specify the unit, either gb or mb.
""",
)
keep_qsub_output: int | None = Field(
default=0,
description="Set to 1 to keep error messages from qsub. Usually only to be used if somethign is seriously wrong with the queue environment/setup.",
Expand Down
1 change: 0 additions & 1 deletion src/everest/config_keys.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,6 @@ class ConfigKeys:
TORQUE_QDEL_CMD = "qdel_cmd"
TORQUE_QUEUE_NAME = "name"
TORQUE_CLUSTER_LABEL = "cluster_label"
TORQUE_MEMORY_PER_JOB = "memory_per_job"
TORQUE_KEEP_QSUB_OUTPUT = "keep_qsub_output"
TORQUE_SUBMIT_SLEEP = "submit_sleep"
TORQUE_PROJECT_CODE = "project_code"
Expand Down
4 changes: 1 addition & 3 deletions src/everest/queue_driver/queue_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,7 @@
(ConfigKeys.TORQUE_QDEL_CMD, "QDEL_CMD"),
(ConfigKeys.TORQUE_QUEUE_NAME, "QUEUE"),
(ConfigKeys.TORQUE_CLUSTER_LABEL, "CLUSTER_LABEL"),
(ConfigKeys.CORES_PER_NODE, "NUM_CPU")(
ConfigKeys.TORQUE_MEMORY_PER_JOB, "MEMORY_PER_JOB"
),
(ConfigKeys.CORES_PER_NODE, "NUM_CPU"),
(ConfigKeys.TORQUE_KEEP_QSUB_OUTPUT, "KEEP_QSUB_OUTPUT"),
(ConfigKeys.TORQUE_SUBMIT_SLEEP, "SUBMIT_SLEEP"),
(ConfigKeys.TORQUE_PROJECT_CODE, "PROJECT_CODE"),
Expand Down
76 changes: 0 additions & 76 deletions tests/ert/unit_tests/config/test_queue_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,44 +154,6 @@ def test_invalid_realization_memory(invalid_memory_spec: str):
)


def test_conflicting_realization_openpbs_memory_per_job():
with (
pytest.raises(ConfigValidationError),
pytest.warns(ConfigWarning, match="deprecated"),
):
ErtConfig.from_file_contents(
"NUM_REALIZATIONS 1\n"
"REALIZATION_MEMORY 10Mb\n"
"QUEUE_SYSTEM TORQUE\n"
"QUEUE_OPTION TORQUE MEMORY_PER_JOB 20mb\n"
)


def test_conflicting_realization_openpbs_memory_per_job_but_slurm_activated_only_warns():
with pytest.warns(ConfigWarning):
ErtConfig.from_file_contents(
"NUM_REALIZATIONS 1\n"
"REALIZATION_MEMORY 10Mb\n"
"QUEUE_SYSTEM SLURM\n"
"QUEUE_OPTION TORQUE MEMORY_PER_JOB 20mb\n"
)


@pytest.mark.parametrize("torque_memory_with_unit_str", ["gb", "mb", "1 gb"])
def test_that_invalid_memory_pr_job_raises_validation_error(
torque_memory_with_unit_str,
):
with (
pytest.raises(ConfigValidationError),
pytest.warns(ConfigWarning, match="deprecated"),
):
ErtConfig.from_file_contents(
"NUM_REALIZATIONS 1\n"
"QUEUE_SYSTEM TORQUE\n"
f"QUEUE_OPTION TORQUE MEMORY_PER_JOB {torque_memory_with_unit_str}"
)


@pytest.mark.parametrize(
"queue_system, queue_system_option",
[("LSF", "LSF_QUEUE"), ("SLURM", "SQUEUE"), ("TORQUE", "QUEUE")],
Expand Down Expand Up @@ -271,44 +233,6 @@ def test_that_configuring_another_queue_system_gives_warning():
)


@pytest.mark.parametrize(
"mem_per_job",
["5gb", "5mb", "5kb"],
)
def test_that_valid_torque_queue_mem_options_are_ok(mem_per_job):
with pytest.warns(ConfigWarning, match="deprecated"):
ErtConfig.from_file_contents(
"NUM_REALIZATIONS 1\n"
"QUEUE_SYSTEM SLURM\n"
f"QUEUE_OPTION TORQUE MEMORY_PER_JOB {mem_per_job}\n"
)


@pytest.mark.parametrize(
"mem_per_job",
["5", "5g"],
)
def test_that_torque_queue_mem_options_are_corrected(mem_per_job: str):
with (
pytest.raises(ConfigValidationError) as e,
pytest.warns(ConfigWarning, match="deprecated"),
):
ErtConfig.from_file_contents(
"NUM_REALIZATIONS 1\n"
"QUEUE_SYSTEM TORQUE\n"
f"QUEUE_OPTION TORQUE MEMORY_PER_JOB {mem_per_job}\n"
)

info = e.value.errors[0]

assert (
f"Value error, wrong memory format. Got input '{mem_per_job}'." in info.message
)
assert info.line == 3
assert info.column == 36
assert info.end_column == info.column + len(mem_per_job)


def test_max_running_property():
config = ErtConfig.from_file_contents(
"NUM_REALIZATIONS 1\n"
Expand Down
34 changes: 14 additions & 20 deletions tests/ert/unit_tests/scheduler/test_openpbs_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,24 +137,24 @@ def parse_resource_string(qsub_args: str) -> dict[str, str]:


@pytest.mark.usefixtures("capturing_qsub")
async def test_memory_per_job():
driver = OpenPBSDriver(memory_per_job="10gb")
await driver.submit(0, "sleep")
assert " -l mem=10gb" in Path("captured_qsub_args").read_text(encoding="utf-8")
async def test_realization_memory():
driver = OpenPBSDriver()
await driver.submit(0, "sleep", realization_memory=1024**2)
assert " -l mem=1gb" in Path("captured_qsub_args").read_text(encoding="utf-8")


@pytest.mark.usefixtures("capturing_qsub")
async def test_no_default_memory_per_job():
async def test_no_default_realization_memory():
driver = OpenPBSDriver()
await driver.submit(0, "sleep")
assert " -l " not in Path("captured_qsub_args").read_text(encoding="utf-8")


@pytest.mark.usefixtures("capturing_qsub")
async def test_no_validation_of_memory_per_job():
async def test_no_validation_of_realization_memory():
# Validation will happen during config parsing
driver = OpenPBSDriver(memory_per_job="a_lot")
await driver.submit(0, "sleep")
driver = OpenPBSDriver()
await driver.submit(0, realization_memory="a_lot")
assert " -l mem=a_lot" in Path("captured_qsub_args").read_text(encoding="utf-8")


Expand Down Expand Up @@ -263,16 +263,17 @@ async def started(iens):

@given(words, st.integers(min_value=1), words)
@pytest.mark.usefixtures("capturing_qsub")
async def test_full_resource_string(memory_per_job, num_cpu, cluster_label):
async def test_full_resource_string(realization_memory, num_cpu, cluster_label):
driver = OpenPBSDriver(
memory_per_job=memory_per_job if memory_per_job else None,
cluster_label=cluster_label if cluster_label else None,
)
await driver.submit(0, "sleep", num_cpu=num_cpu)
await driver.submit(
0, "sleep", num_cpu=num_cpu, realization_memory=realization_memory
)
resources = parse_resource_string(
Path("captured_qsub_args").read_text(encoding="utf-8")
)
assert resources.get("mem", "") == memory_per_job
assert resources.get("mem", "") == realization_memory
assert resources.get("select", "1") == "1"
assert resources.get("ncpus", "1") == str(num_cpu)

Expand All @@ -283,7 +284,7 @@ async def test_full_resource_string(memory_per_job, num_cpu, cluster_label):

assert len(resources) == sum(
[
bool(memory_per_job),
bool(realization_memory),
num_cpu > 1,
bool(cluster_label),
]
Expand All @@ -300,13 +301,6 @@ async def test_submit_with_realization_memory():
assert resources.get("mem", "") == "1mb"


@pytest.mark.usefixtures("capturing_qsub")
async def test_submit_with_realization_memory_and_memory_per_job():
driver = OpenPBSDriver(memory_per_job="1")
with pytest.raises(ValueError, match="Overspecified memory"):
await driver.submit(0, "sleep", realization_memory=1)


@pytest.mark.parametrize(
("exit_code, error_msg"),
[
Expand Down
3 changes: 0 additions & 3 deletions tests/ert/unit_tests/scheduler/test_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -653,7 +653,6 @@ def test_scheduler_create_lsf_driver():
def test_scheduler_create_openpbs_driver():
queue_name = "foo_queue"
keep_qsub_output = "True"
memory_per_job = "13gb"
cluster_label = "bar_cluster_label"
job_prefix = "foo_job_prefix"
qsub_cmd = "bar_qsub_cmd"
Expand All @@ -665,7 +664,6 @@ def test_scheduler_create_openpbs_driver():
"QUEUE_OPTION": [
("TORQUE", "QUEUE", queue_name),
("TORQUE", "KEEP_QSUB_OUTPUT", keep_qsub_output),
("TORQUE", "MEMORY_PER_JOB", memory_per_job),
("TORQUE", "CLUSTER_LABEL", cluster_label),
("TORQUE", "JOB_PREFIX", job_prefix),
("TORQUE", "QSUB_CMD", qsub_cmd),
Expand All @@ -678,7 +676,6 @@ def test_scheduler_create_openpbs_driver():
assert isinstance(driver, OpenPBSDriver)
assert driver._queue_name == queue_name
assert driver._keep_qsub_output == True if keep_qsub_output == "True" else False
assert driver._memory_per_job == memory_per_job
assert driver._cluster_label == cluster_label
assert driver._job_prefix == job_prefix
assert str(driver._qsub_cmd) == qsub_cmd
Expand Down

0 comments on commit 51cd724

Please sign in to comment.