Remove deprecated torque option MEMORY_PER_JOB

equinor · Dec 17, 2024 · 51cd724 · 51cd724
1 parent 726e50f
commit 51cd724
Show file tree

Hide file tree

Showing 12 changed files with 17 additions and 176 deletions.
diff --git a/docs/ert/reference/configuration/keywords.rst b/docs/ert/reference/configuration/keywords.rst
@@ -1825,8 +1825,8 @@ in :ref:`queue-system-chapter`. In brief, the queue systems have the following o
   ``BHIST_CMD``, ``SUBMIT_SLEEP``, ``PROJECT_CODE``, ``EXCLUDE_HOST``,
   ``MAX_RUNNING``
 * :ref:`TORQUE <pbs-systems>` — ``QSUB_CMD``, ``QSTAT_CMD``, ``QDEL_CMD``,
-  ``QUEUE``, ``CLUSTER_LABEL``, ``MAX_RUNNING``,
-  ``MEMORY_PER_JOB``, ``KEEP_QSUB_OUTPUT``, ``SUBMIT_SLEEP``
+  ``QUEUE``, ``CLUSTER_LABEL``, ``MAX_RUNNING``, ``KEEP_QSUB_OUTPUT``,
+  ``SUBMIT_SLEEP``
 * :ref:`SLURM <slurm-systems>` — ``SBATCH``, ``SCANCEL``, ``SCONTROL``, ``SACCT``,
   ``SQUEUE``, ``PARTITION``, ``SQUEUE_TIMEOUT``, ``MAX_RUNTIME``, ``INCLUDE_HOST``,
   ``EXCLUDE_HOST``, ``MAX_RUNNING``

diff --git a/docs/ert/reference/configuration/queue.rst b/docs/ert/reference/configuration/queue.rst
@@ -277,27 +277,6 @@ The following is a list of all queue-specific configuration options:
 
   If ``n`` is zero (the default), then it is set to the number of realizations.
 
-.. _torque_memory_per_job:
-.. topic:: MEMORY_PER_JOB
-
-  You can specify the amount of memory you will need for running your
-  job. This will ensure that not too many jobs will run on a single
-  shared memory node at once, possibly crashing the compute node if it
-  runs out of memory.
-
-  You can get an indication of the memory requirement by watching the
-  course of a local run using the ``htop`` utility. Whether you should set
-  the peak memory usage as your requirement or a lower figure depends on
-  how simultaneously each job will run.
-
-  The option to be supplied will be used as a string in the ``qsub``
-  argument. You must specify the unit, either ``gb`` or ``mb`` as in
-  the example::
-
-    QUEUE_OPTION TORQUE MEMORY_PER_JOB 16gb
-
-  By default, this value is not set.
-
 .. _torque_keep_qsub_output:
 .. topic:: KEEP_QSUB_OUTPUT
 

diff --git a/docs/everest/config_generated.rst b/docs/everest/config_generated.rst
@@ -1043,15 +1043,6 @@ Simulation settings
     The name of the cluster you are running simulations in.
 
 
-**memory_per_job (optional)**
-    Type: *Optional[str]*
-
-    You can specify the amount of memory you will need for running your job. This will ensure that not too many jobs will run on a single shared memory node at once, possibly crashing the compute node if it runs out of memory.
-    You can get an indication of the memory requirement by watching the course of a local run using the htop utility. Whether you should set the peak memory usage as your requirement or a lower figure depends on how simultaneously each job will run.
-    The option to be supplied will be used as a string in the qsub argument. You must specify the unit, either gb or mb.
-
-
-
 **keep_qsub_output (optional)**
     Type: *Optional[int]*
 

diff --git a/src/ert/config/parsing/config_schema_deprecations.py b/src/ert/config/parsing/config_schema_deprecations.py
@@ -181,12 +181,6 @@
         "for the Ensemble Smoother update algorithm. "
         "Please use ENKF_ALPHA and STD_CUTOFF keywords instead.",
     ),
-    DeprecationInfo(
-        keyword="QUEUE_OPTION",
-        message="MEMORY_PER_JOB as QUEUE_OPTION to TORQUE is deprecated and will be removed in "
-        "the future. Replace by REALIZATION_MEMORY.",
-        check=lambda line: "MEMORY_PER_JOB" in line,
-    ),
     DeprecationInfo(
         keyword="QUEUE_OPTION",
         message="Memory requirements in LSF should now be set using REALIZATION_MEMORY and not"

diff --git a/src/ert/config/queue_config.py b/src/ert/config/queue_config.py
@@ -126,7 +126,6 @@ class TorqueQueueOptions(QueueOptions):
     qstat_cmd: NonEmptyString | None = None
     qdel_cmd: NonEmptyString | None = None
     queue: NonEmptyString | None = None
-    memory_per_job: NonEmptyString | None = None
     cluster_label: NonEmptyString | None = None
     job_prefix: NonEmptyString | None = None
     keep_qsub_output: bool = False
@@ -140,13 +139,6 @@ def driver_options(self) -> dict[str, Any]:
         driver_dict.pop("submit_sleep")
         return driver_dict
 
-    @pydantic.field_validator("memory_per_job")
-    @classmethod
-    def check_memory_per_job(cls, value: str | None) -> str | None:
-        if not torque_memory_usage_format.validate(value):
-            raise ValueError("wrong memory format")
-        return value
-
 
 @pydantic.dataclasses.dataclass
 class SlurmQueueOptions(QueueOptions):
@@ -315,17 +307,6 @@ def from_dict(cls, config_dict: ConfigDict) -> QueueConfig:
             if tags:
                 queue_options.project_code = "+".join(tags)
 
-        for _queue_vals in all_validated_queue_options.values():
-            if (
-                isinstance(_queue_vals, TorqueQueueOptions)
-                and _queue_vals.memory_per_job
-                and realization_memory
-            ):
-                _throw_error_or_warning(
-                    "Do not specify both REALIZATION_MEMORY and TORQUE option MEMORY_PER_JOB",
-                    "MEMORY_PER_JOB",
-                    selected_queue_system == QueueSystem.TORQUE,
-                )
 
         return QueueConfig(
             job_script,

diff --git a/src/ert/scheduler/openpbs_driver.py b/src/ert/scheduler/openpbs_driver.py
@@ -124,7 +124,6 @@ def __init__(
         queue_name: str | None = None,
         project_code: str | None = None,
         keep_qsub_output: bool | None = None,
-        memory_per_job: str | None = None,
         cluster_label: str | None = None,
         job_prefix: str | None = None,
         qsub_cmd: str | None = None,
@@ -137,7 +136,6 @@ def __init__(
         self._queue_name = queue_name
         self._project_code = project_code
         self._keep_qsub_output = keep_qsub_output
-        self._memory_per_job = memory_per_job
         self._cluster_label: str | None = cluster_label
         self._job_prefix = job_prefix
         self._max_pbs_cmd_attempts = 10
@@ -162,13 +160,6 @@ def _build_resource_string(
         cpu_resources: list[str] = []
         if num_cpu > 1:
             cpu_resources += [f"ncpus={num_cpu}"]
-        if self._memory_per_job is not None and realization_memory > 0:
-            raise ValueError(
-                "Overspecified memory pr job. "
-                "Do not specify both memory_per_job and realization_memory"
-            )
-        if self._memory_per_job is not None:
-            cpu_resources += [f"mem={self._memory_per_job}"]
         elif realization_memory > 0:
             cpu_resources += [f"mem={realization_memory // 1024**2 }mb"]
         if cpu_resources:

diff --git a/src/everest/config/simulator_config.py b/src/everest/config/simulator_config.py
@@ -123,13 +123,6 @@ class SimulatorConfig(BaseModel, HasErtQueueOptions, extra="forbid"):  # type: i
         default=None,
         description="The name of the cluster you are running simulations in.",
     )
-    memory_per_job: str | None = Field(
-        default=None,
-        description="""You can specify the amount of memory you will need for running your job. This will ensure that not too many jobs will run on a single shared memory node at once, possibly crashing the compute node if it runs out of memory.
-    You can get an indication of the memory requirement by watching the course of a local run using the htop utility. Whether you should set the peak memory usage as your requirement or a lower figure depends on how simultaneously each job will run.
-    The option to be supplied will be used as a string in the qsub argument. You must specify the unit, either gb or mb.
-    """,
-    )
     keep_qsub_output: int | None = Field(
         default=0,
         description="Set to 1 to keep error messages from qsub. Usually only to be used if somethign is seriously wrong with the queue environment/setup.",

diff --git a/src/everest/config_keys.py b/src/everest/config_keys.py
@@ -123,7 +123,6 @@ class ConfigKeys:
     TORQUE_QDEL_CMD = "qdel_cmd"
     TORQUE_QUEUE_NAME = "name"
     TORQUE_CLUSTER_LABEL = "cluster_label"
-    TORQUE_MEMORY_PER_JOB = "memory_per_job"
     TORQUE_KEEP_QSUB_OUTPUT = "keep_qsub_output"
     TORQUE_SUBMIT_SLEEP = "submit_sleep"
     TORQUE_PROJECT_CODE = "project_code"

diff --git a/src/everest/queue_driver/queue_driver.py b/src/everest/queue_driver/queue_driver.py
@@ -32,9 +32,7 @@
     (ConfigKeys.TORQUE_QDEL_CMD, "QDEL_CMD"),
     (ConfigKeys.TORQUE_QUEUE_NAME, "QUEUE"),
     (ConfigKeys.TORQUE_CLUSTER_LABEL, "CLUSTER_LABEL"),
-    (ConfigKeys.CORES_PER_NODE, "NUM_CPU")(
-        ConfigKeys.TORQUE_MEMORY_PER_JOB, "MEMORY_PER_JOB"
-    ),
+    (ConfigKeys.CORES_PER_NODE, "NUM_CPU"),
     (ConfigKeys.TORQUE_KEEP_QSUB_OUTPUT, "KEEP_QSUB_OUTPUT"),
     (ConfigKeys.TORQUE_SUBMIT_SLEEP, "SUBMIT_SLEEP"),
     (ConfigKeys.TORQUE_PROJECT_CODE, "PROJECT_CODE"),

diff --git a/tests/ert/unit_tests/config/test_queue_config.py b/tests/ert/unit_tests/config/test_queue_config.py
@@ -154,44 +154,6 @@ def test_invalid_realization_memory(invalid_memory_spec: str):
         )
 
 
-def test_conflicting_realization_openpbs_memory_per_job():
-    with (
-        pytest.raises(ConfigValidationError),
-        pytest.warns(ConfigWarning, match="deprecated"),
-    ):
-        ErtConfig.from_file_contents(
-            "NUM_REALIZATIONS 1\n"
-            "REALIZATION_MEMORY 10Mb\n"
-            "QUEUE_SYSTEM TORQUE\n"
-            "QUEUE_OPTION TORQUE MEMORY_PER_JOB 20mb\n"
-        )
-
-
-def test_conflicting_realization_openpbs_memory_per_job_but_slurm_activated_only_warns():
-    with pytest.warns(ConfigWarning):
-        ErtConfig.from_file_contents(
-            "NUM_REALIZATIONS 1\n"
-            "REALIZATION_MEMORY 10Mb\n"
-            "QUEUE_SYSTEM SLURM\n"
-            "QUEUE_OPTION TORQUE MEMORY_PER_JOB 20mb\n"
-        )
-
-
-@pytest.mark.parametrize("torque_memory_with_unit_str", ["gb", "mb", "1 gb"])
-def test_that_invalid_memory_pr_job_raises_validation_error(
-    torque_memory_with_unit_str,
-):
-    with (
-        pytest.raises(ConfigValidationError),
-        pytest.warns(ConfigWarning, match="deprecated"),
-    ):
-        ErtConfig.from_file_contents(
-            "NUM_REALIZATIONS 1\n"
-            "QUEUE_SYSTEM TORQUE\n"
-            f"QUEUE_OPTION TORQUE MEMORY_PER_JOB {torque_memory_with_unit_str}"
-        )
-
-
 @pytest.mark.parametrize(
     "queue_system, queue_system_option",
     [("LSF", "LSF_QUEUE"), ("SLURM", "SQUEUE"), ("TORQUE", "QUEUE")],
@@ -271,44 +233,6 @@ def test_that_configuring_another_queue_system_gives_warning():
         )
 
 
-@pytest.mark.parametrize(
-    "mem_per_job",
-    ["5gb", "5mb", "5kb"],
-)
-def test_that_valid_torque_queue_mem_options_are_ok(mem_per_job):
-    with pytest.warns(ConfigWarning, match="deprecated"):
-        ErtConfig.from_file_contents(
-            "NUM_REALIZATIONS 1\n"
-            "QUEUE_SYSTEM SLURM\n"
-            f"QUEUE_OPTION TORQUE MEMORY_PER_JOB {mem_per_job}\n"
-        )
-
-
-@pytest.mark.parametrize(
-    "mem_per_job",
-    ["5", "5g"],
-)
-def test_that_torque_queue_mem_options_are_corrected(mem_per_job: str):
-    with (
-        pytest.raises(ConfigValidationError) as e,
-        pytest.warns(ConfigWarning, match="deprecated"),
-    ):
-        ErtConfig.from_file_contents(
-            "NUM_REALIZATIONS 1\n"
-            "QUEUE_SYSTEM TORQUE\n"
-            f"QUEUE_OPTION TORQUE MEMORY_PER_JOB {mem_per_job}\n"
-        )
-
-    info = e.value.errors[0]
-
-    assert (
-        f"Value error, wrong memory format. Got input '{mem_per_job}'." in info.message
-    )
-    assert info.line == 3
-    assert info.column == 36
-    assert info.end_column == info.column + len(mem_per_job)
-
-
 def test_max_running_property():
     config = ErtConfig.from_file_contents(
         "NUM_REALIZATIONS 1\n"

diff --git a/tests/ert/unit_tests/scheduler/test_openpbs_driver.py b/tests/ert/unit_tests/scheduler/test_openpbs_driver.py
@@ -137,24 +137,24 @@ def parse_resource_string(qsub_args: str) -> dict[str, str]:
 
 
 @pytest.mark.usefixtures("capturing_qsub")
-async def test_memory_per_job():
-    driver = OpenPBSDriver(memory_per_job="10gb")
-    await driver.submit(0, "sleep")
-    assert " -l mem=10gb" in Path("captured_qsub_args").read_text(encoding="utf-8")
+async def test_realization_memory():
+    driver = OpenPBSDriver()
+    await driver.submit(0, "sleep", realization_memory=1024**2)
+    assert " -l mem=1gb" in Path("captured_qsub_args").read_text(encoding="utf-8")
 
 
 @pytest.mark.usefixtures("capturing_qsub")
-async def test_no_default_memory_per_job():
+async def test_no_default_realization_memory():
     driver = OpenPBSDriver()
     await driver.submit(0, "sleep")
     assert " -l " not in Path("captured_qsub_args").read_text(encoding="utf-8")
 
 
 @pytest.mark.usefixtures("capturing_qsub")
-async def test_no_validation_of_memory_per_job():
+async def test_no_validation_of_realization_memory():
     # Validation will happen during config parsing
-    driver = OpenPBSDriver(memory_per_job="a_lot")
-    await driver.submit(0, "sleep")
+    driver = OpenPBSDriver()
+    await driver.submit(0, realization_memory="a_lot")
     assert " -l mem=a_lot" in Path("captured_qsub_args").read_text(encoding="utf-8")
 
 
@@ -263,16 +263,17 @@ async def started(iens):
 
 @given(words, st.integers(min_value=1), words)
 @pytest.mark.usefixtures("capturing_qsub")
-async def test_full_resource_string(memory_per_job, num_cpu, cluster_label):
+async def test_full_resource_string(realization_memory, num_cpu, cluster_label):
     driver = OpenPBSDriver(
-        memory_per_job=memory_per_job if memory_per_job else None,
         cluster_label=cluster_label if cluster_label else None,
     )
-    await driver.submit(0, "sleep", num_cpu=num_cpu)
+    await driver.submit(
+        0, "sleep", num_cpu=num_cpu, realization_memory=realization_memory
+    )
     resources = parse_resource_string(
         Path("captured_qsub_args").read_text(encoding="utf-8")
     )
-    assert resources.get("mem", "") == memory_per_job
+    assert resources.get("mem", "") == realization_memory
     assert resources.get("select", "1") == "1"
     assert resources.get("ncpus", "1") == str(num_cpu)
 
@@ -283,7 +284,7 @@ async def test_full_resource_string(memory_per_job, num_cpu, cluster_label):
 
     assert len(resources) == sum(
         [
-            bool(memory_per_job),
+            bool(realization_memory),
             num_cpu > 1,
             bool(cluster_label),
         ]
@@ -300,13 +301,6 @@ async def test_submit_with_realization_memory():
     assert resources.get("mem", "") == "1mb"
 
 
-@pytest.mark.usefixtures("capturing_qsub")
-async def test_submit_with_realization_memory_and_memory_per_job():
-    driver = OpenPBSDriver(memory_per_job="1")
-    with pytest.raises(ValueError, match="Overspecified memory"):
-        await driver.submit(0, "sleep", realization_memory=1)
-
-
 @pytest.mark.parametrize(
     ("exit_code, error_msg"),
     [

diff --git a/tests/ert/unit_tests/scheduler/test_scheduler.py b/tests/ert/unit_tests/scheduler/test_scheduler.py
@@ -653,7 +653,6 @@ def test_scheduler_create_lsf_driver():
 def test_scheduler_create_openpbs_driver():
     queue_name = "foo_queue"
     keep_qsub_output = "True"
-    memory_per_job = "13gb"
     cluster_label = "bar_cluster_label"
     job_prefix = "foo_job_prefix"
     qsub_cmd = "bar_qsub_cmd"
@@ -665,7 +664,6 @@ def test_scheduler_create_openpbs_driver():
         "QUEUE_OPTION": [
             ("TORQUE", "QUEUE", queue_name),
             ("TORQUE", "KEEP_QSUB_OUTPUT", keep_qsub_output),
-            ("TORQUE", "MEMORY_PER_JOB", memory_per_job),
             ("TORQUE", "CLUSTER_LABEL", cluster_label),
             ("TORQUE", "JOB_PREFIX", job_prefix),
             ("TORQUE", "QSUB_CMD", qsub_cmd),
@@ -678,7 +676,6 @@ def test_scheduler_create_openpbs_driver():
     assert isinstance(driver, OpenPBSDriver)
     assert driver._queue_name == queue_name
     assert driver._keep_qsub_output == True if keep_qsub_output == "True" else False
-    assert driver._memory_per_job == memory_per_job
     assert driver._cluster_label == cluster_label
     assert driver._job_prefix == job_prefix
     assert str(driver._qsub_cmd) == qsub_cmd