From 4bc6e0d12549bdd4a40fb84bdd301a389be43606 Mon Sep 17 00:00:00 2001
From: Eivind Jahren <ejah@equinor.com>
Date: Fri, 13 Sep 2024 09:11:15 +0200
Subject: [PATCH 01/11] Move cli tests to ui_tests/cli

---
 .../analysis/test_es_update.py                | 469 ------------------
 .../scheduler/test_openpbs_driver.py          |   2 +-
 .../analysis => ui_tests}/__init__.py         |   0
 .../shared => ui_tests/cli}/__init__.py       |   0
 .../cli/analysis}/__init__.py                 |   0
 .../0/update_log                              |   0
 .../analysis/test_adaptive_localization.py    |   2 +-
 tests/ui_tests/cli/analysis/test_es_update.py | 207 ++++++++
 .../cli}/run_cli.py                           |   0
 .../test_es_mda/es_mda_integration_snapshot   |   0
 .../es_mda_integration_snapshot               |   0
 .../es_mda_integration_snapshot               |   0
 .../cli}/test_cli.py                          |   0
 .../cli}/test_field_parameter.py              |   3 +-
 .../cli}/test_observation_times.py            |   0
 .../cli}/test_parameter_example.py            |   0
 .../cli}/test_parameter_sample_types.py       |   3 +-
 .../share => ui_tests/cli}/test_shell.py      |   3 +-
 .../storage => ui_tests/gui}/__init__.py      |   0
 tests/unit_tests/analysis/test_es_update.py   | 226 +++++++++
 .../gui/tools/test_manage_experiments_tool.py |   2 +-
 21 files changed, 442 insertions(+), 475 deletions(-)
 delete mode 100644 tests/integration_tests/analysis/test_es_update.py
 rename tests/{integration_tests/analysis => ui_tests}/__init__.py (100%)
 rename tests/{integration_tests/shared => ui_tests/cli}/__init__.py (100%)
 rename tests/{integration_tests/shared/share => ui_tests/cli/analysis}/__init__.py (100%)
 rename tests/{integration_tests => ui_tests/cli}/analysis/snapshots/test_es_update/test_update_only_using_subset_observations/0/update_log (100%)
 rename tests/{integration_tests => ui_tests/cli}/analysis/test_adaptive_localization.py (99%)
 create mode 100644 tests/ui_tests/cli/analysis/test_es_update.py
 rename tests/{integration_tests => ui_tests/cli}/run_cli.py (100%)
 rename tests/{integration_tests => ui_tests/cli}/snapshots/test_cli/test_es_mda/es_mda_integration_snapshot (100%)
 rename tests/{integration_tests => ui_tests/cli}/snapshots/test_cli/test_es_mda/using_job_queue/es_mda_integration_snapshot (100%)
 rename tests/{integration_tests => ui_tests/cli}/snapshots/test_cli/test_es_mda/using_scheduler/es_mda_integration_snapshot (100%)
 rename tests/{integration_tests => ui_tests/cli}/test_cli.py (100%)
 rename tests/{integration_tests/storage => ui_tests/cli}/test_field_parameter.py (99%)
 rename tests/{integration_tests => ui_tests/cli}/test_observation_times.py (100%)
 rename tests/{integration_tests => ui_tests/cli}/test_parameter_example.py (100%)
 rename tests/{integration_tests/storage => ui_tests/cli}/test_parameter_sample_types.py (99%)
 rename tests/{integration_tests/shared/share => ui_tests/cli}/test_shell.py (97%)
 rename tests/{integration_tests/storage => ui_tests/gui}/__init__.py (100%)

diff --git a/tests/integration_tests/analysis/test_es_update.py b/tests/integration_tests/analysis/test_es_update.py
deleted file mode 100644
index faadc9009bf..00000000000
--- a/tests/integration_tests/analysis/test_es_update.py
+++ /dev/null
@@ -1,469 +0,0 @@
-import os
-import stat
-from pathlib import Path
-from textwrap import dedent
-
-import numpy as np
-import pytest
-import xarray as xr
-from scipy.ndimage import gaussian_filter
-from xtgeo import RegularSurface, surface_from_file
-
-from ert import LibresFacade
-from ert.analysis import ErtAnalysisError, ObservationStatus, smoother_update
-from ert.analysis._es_update import _all_parameters
-from ert.analysis.event import AnalysisCompleteEvent
-from ert.config import ErtConfig, GenDataConfig, GenKwConfig
-from ert.config.analysis_config import UpdateSettings
-from ert.config.analysis_module import ESSettings
-from ert.config.gen_kw_config import TransformFunctionDefinition
-from ert.mode_definitions import ENSEMBLE_SMOOTHER_MODE
-from ert.storage import open_storage
-from ert.storage.realization_storage_state import RealizationStorageState
-from tests.integration_tests.run_cli import run_cli
-
-
-@pytest.fixture
-def uniform_parameter():
-    return GenKwConfig(
-        name="PARAMETER",
-        forward_init=False,
-        template_file="",
-        transform_function_definitions=[
-            TransformFunctionDefinition("KEY1", "UNIFORM", [0, 1]),
-        ],
-        output_file="kw.txt",
-        update=True,
-    )
-
-
-@pytest.fixture
-def obs():
-    return xr.Dataset(
-        {
-            "observations": (["report_step", "index"], [[1.0, 1.0, 1.0]]),
-            "std": (["report_step", "index"], [[0.1, 1.0, 10.0]]),
-        },
-        coords={"index": [0, 1, 2], "report_step": [0]},
-        attrs={"response": "RESPONSE"},
-    )
-
-
-@pytest.mark.integration_test
-@pytest.mark.usefixtures("copy_poly_case")
-def test_that_posterior_has_lower_variance_than_prior():
-    run_cli(
-        ENSEMBLE_SMOOTHER_MODE,
-        "--disable-monitor",
-        "--realizations",
-        "1-50",
-        "poly.ert",
-        "--experiment-name",
-        "es-test",
-    )
-    facade = LibresFacade.from_config_file("poly.ert")
-    with open_storage(facade.enspath) as storage:
-        experiment = storage.get_experiment_by_name("es-test")
-        prior_ensemble = experiment.get_ensemble_by_name("iter-0")
-        df_default = prior_ensemble.load_all_gen_kw_data()
-        posterior_ensemble = experiment.get_ensemble_by_name("iter-1")
-        df_target = posterior_ensemble.load_all_gen_kw_data()
-
-        # The std for the ensemble should decrease
-        assert float(
-            prior_ensemble.calculate_std_dev_for_parameter("COEFFS")["values"].sum()
-        ) > float(
-            posterior_ensemble.calculate_std_dev_for_parameter("COEFFS")["values"].sum()
-        )
-
-    # We expect that ERT's update step lowers the
-    # generalized variance for the parameters.
-    assert (
-        0
-        < np.linalg.det(df_target.cov().to_numpy())
-        < np.linalg.det(df_default.cov().to_numpy())
-    )
-
-
-@pytest.mark.integration_test
-@pytest.mark.usefixtures("copy_snake_oil_field")
-def test_that_surfaces_retain_their_order_when_loaded_and_saved_by_ert():
-    """This is a regression test to make sure ert does not use the wrong order
-    (row-major / column-major) when working with surfaces.
-    """
-    rng = np.random.default_rng()
-
-    def sample_prior(nx, ny):
-        return np.exp(
-            5
-            * gaussian_filter(
-                gaussian_filter(rng.random(size=(nx, ny)), sigma=2.0), sigma=1.0
-            )
-        )
-
-    nx = 5
-    ny = 7
-    ensemble_size = 2
-
-    Path("./surface").mkdir()
-    for i in range(ensemble_size):
-        surf = RegularSurface(
-            ncol=nx, nrow=ny, xinc=1.0, yinc=1.0, values=sample_prior(nx, ny)
-        )
-        surf.to_file(f"surface/surf_init_{i}.irap", fformat="irap_ascii")
-
-    # Single observation with a large ERROR to make sure the udpate is minimal.
-    obs = """
-    SUMMARY_OBSERVATION WOPR_OP1_9
-    {
-        VALUE   = 0.1;
-        ERROR   = 200.0;
-        DATE    = 2010-03-31;
-        KEY     = WOPR:OP1;
-    };
-    """
-
-    with open("observations/observations.txt", "w", encoding="utf-8") as file:
-        file.write(obs)
-
-    run_cli(
-        ENSEMBLE_SMOOTHER_MODE,
-        "--disable-monitor",
-        "snake_oil_surface.ert",
-    )
-
-    ert_config = ErtConfig.from_file("snake_oil_surface.ert")
-
-    storage = open_storage(ert_config.ens_path)
-    experiment = storage.get_experiment_by_name("es")
-    ens_prior = experiment.get_ensemble_by_name("iter-0")
-    ens_posterior = experiment.get_ensemble_by_name("iter-1")
-
-    # Check that surfaces defined in INIT_FILES are not changed by ERT
-    surf_prior = ens_prior.load_parameters("TOP", list(range(ensemble_size)))["values"]
-    for i in range(ensemble_size):
-        _prior_init = surface_from_file(
-            f"surface/surf_init_{i}.irap", fformat="irap_ascii", dtype=np.float32
-        )
-        np.testing.assert_array_equal(surf_prior[i], _prior_init.values.data)
-
-    surf_posterior = ens_posterior.load_parameters("TOP", list(range(ensemble_size)))[
-        "values"
-    ]
-
-    assert surf_prior.shape == surf_posterior.shape
-
-    for i in range(ensemble_size):
-        with pytest.raises(AssertionError):
-            np.testing.assert_array_equal(surf_prior[i], surf_posterior[i])
-        np.testing.assert_almost_equal(
-            surf_prior[i].values, surf_posterior[i].values, decimal=2
-        )
-
-
-@pytest.mark.integration_test
-@pytest.mark.usefixtures("copy_snake_oil_field")
-def test_update_multiple_param():
-    run_cli(
-        ENSEMBLE_SMOOTHER_MODE,
-        "--disable-monitor",
-        "snake_oil.ert",
-    )
-
-    ert_config = ErtConfig.from_file("snake_oil.ert")
-
-    storage = open_storage(ert_config.ens_path)
-    experiment = storage.get_experiment_by_name("es")
-    prior_ensemble = experiment.get_ensemble_by_name("iter-0")
-    posterior_ensemble = experiment.get_ensemble_by_name("iter-1")
-
-    prior_array = _all_parameters(prior_ensemble, list(range(10)))
-    posterior_array = _all_parameters(posterior_ensemble, list(range(10)))
-
-    # We expect that ERT's update step lowers the
-    # generalized variance for the parameters.
-    # https://en.wikipedia.org/wiki/Variance#For_vector-valued_random_variables
-    assert np.trace(np.cov(posterior_array)) < np.trace(np.cov(prior_array))
-
-
-@pytest.mark.integration_test
-def test_gen_data_obs_data_mismatch(storage, uniform_parameter):
-    resp = GenDataConfig(keys=["RESPONSE"])
-    obs = xr.Dataset(
-        {
-            "observations": (["report_step", "index"], [[1.0]]),
-            "std": (["report_step", "index"], [[0.1]]),
-        },
-        coords={"index": [1000], "report_step": [0]},
-        attrs={"response": "RESPONSE"},
-    )
-    experiment = storage.create_experiment(
-        parameters=[uniform_parameter],
-        responses=[resp],
-        observations={"OBSERVATION": obs},
-    )
-    prior = storage.create_ensemble(
-        experiment,
-        ensemble_size=10,
-        iteration=0,
-        name="prior",
-    )
-    rng = np.random.default_rng(1234)
-    for iens in range(prior.ensemble_size):
-        data = rng.uniform(0, 1)
-        prior.save_parameters(
-            "PARAMETER",
-            iens,
-            xr.Dataset(
-                {
-                    "values": ("names", [data]),
-                    "transformed_values": ("names", [data]),
-                    "names": ["KEY_1"],
-                }
-            ),
-        )
-        data = rng.uniform(0.8, 1, 3)
-        prior.save_response(
-            "gen_data",
-            xr.Dataset(
-                {"values": (["name", "report_step", "index"], [[data]])},
-                coords={
-                    "name": ["RESPONSE"],
-                    "index": range(len(data)),
-                    "report_step": [0],
-                },
-            ),
-            iens,
-        )
-    posterior_ens = storage.create_ensemble(
-        prior.experiment_id,
-        ensemble_size=prior.ensemble_size,
-        iteration=1,
-        name="posterior",
-        prior_ensemble=prior,
-    )
-    with pytest.raises(
-        ErtAnalysisError,
-        match="No active observations",
-    ):
-        smoother_update(
-            prior,
-            posterior_ens,
-            ["OBSERVATION"],
-            ["PARAMETER"],
-            UpdateSettings(),
-            ESSettings(),
-        )
-
-
-@pytest.mark.usefixtures("use_tmpdir")
-@pytest.mark.integration_test
-def test_gen_data_missing(storage, uniform_parameter, obs):
-    resp = GenDataConfig(keys=["RESPONSE"])
-    experiment = storage.create_experiment(
-        parameters=[uniform_parameter],
-        responses=[resp],
-        observations={"OBSERVATION": obs},
-    )
-    prior = storage.create_ensemble(
-        experiment,
-        ensemble_size=10,
-        iteration=0,
-        name="prior",
-    )
-    rng = np.random.default_rng(1234)
-    for iens in range(prior.ensemble_size):
-        data = rng.uniform(0, 1)
-        prior.save_parameters(
-            "PARAMETER",
-            iens,
-            xr.Dataset(
-                {
-                    "values": ("names", [data]),
-                    "transformed_values": ("names", [data]),
-                    "names": ["KEY_1"],
-                }
-            ),
-        )
-        data = rng.uniform(0.8, 1, 2)  # Importantly, shorter than obs
-        prior.save_response(
-            "gen_data",
-            xr.Dataset(
-                {"values": (["name", "report_step", "index"], [[data]])},
-                coords={
-                    "name": ["RESPONSE"],
-                    "index": range(len(data)),
-                    "report_step": [0],
-                },
-            ),
-            iens,
-        )
-    posterior_ens = storage.create_ensemble(
-        prior.experiment_id,
-        ensemble_size=prior.ensemble_size,
-        iteration=1,
-        name="posterior",
-        prior_ensemble=prior,
-    )
-    events = []
-
-    update_snapshot = smoother_update(
-        prior,
-        posterior_ens,
-        ["OBSERVATION"],
-        ["PARAMETER"],
-        UpdateSettings(),
-        ESSettings(),
-        progress_callback=events.append,
-    )
-    assert [step.status for step in update_snapshot.update_step_snapshots] == [
-        ObservationStatus.ACTIVE,
-        ObservationStatus.ACTIVE,
-        ObservationStatus.MISSING_RESPONSE,
-    ]
-
-    update_event = next(e for e in events if isinstance(e, AnalysisCompleteEvent))
-    data_section = update_event.data
-    assert data_section.extra["Active observations"] == "2"
-    assert data_section.extra["Deactivated observations - missing respons(es)"] == "1"
-
-
-@pytest.mark.usefixtures("use_tmpdir")
-@pytest.mark.integration_test
-def test_update_subset_parameters(storage, uniform_parameter, obs):
-    no_update_param = GenKwConfig(
-        name="EXTRA_PARAMETER",
-        forward_init=False,
-        template_file="",
-        transform_function_definitions=[
-            TransformFunctionDefinition("KEY1", "UNIFORM", [0, 1]),
-        ],
-        output_file=None,
-        update=False,
-    )
-    resp = GenDataConfig(keys=["RESPONSE"])
-    experiment = storage.create_experiment(
-        parameters=[uniform_parameter, no_update_param],
-        responses=[resp],
-        observations={"OBSERVATION": obs},
-    )
-    prior = storage.create_ensemble(
-        experiment,
-        ensemble_size=10,
-        iteration=0,
-        name="prior",
-    )
-    rng = np.random.default_rng(1234)
-    for iens in range(prior.ensemble_size):
-        data = rng.uniform(0, 1)
-        prior.save_parameters(
-            "PARAMETER",
-            iens,
-            xr.Dataset(
-                {
-                    "values": ("names", [data]),
-                    "transformed_values": ("names", [data]),
-                    "names": ["KEY_1"],
-                }
-            ),
-        )
-        prior.save_parameters(
-            "EXTRA_PARAMETER",
-            iens,
-            xr.Dataset(
-                {
-                    "values": ("names", [data]),
-                    "transformed_values": ("names", [data]),
-                    "names": ["KEY_1"],
-                }
-            ),
-        )
-
-        data = rng.uniform(0.8, 1, 10)
-        prior.save_response(
-            "gen_data",
-            xr.Dataset(
-                {"values": (["name", "report_step", "index"], [[data]])},
-                coords={
-                    "name": ["RESPONSE"],
-                    "index": range(len(data)),
-                    "report_step": [0],
-                },
-            ),
-            iens,
-        )
-    posterior_ens = storage.create_ensemble(
-        prior.experiment_id,
-        ensemble_size=prior.ensemble_size,
-        iteration=1,
-        name="posterior",
-        prior_ensemble=prior,
-    )
-    smoother_update(
-        prior,
-        posterior_ens,
-        ["OBSERVATION"],
-        ["PARAMETER"],
-        UpdateSettings(),
-        ESSettings(),
-    )
-    assert prior.load_parameters("EXTRA_PARAMETER", 0)["values"].equals(
-        posterior_ens.load_parameters("EXTRA_PARAMETER", 0)["values"]
-    )
-    assert not prior.load_parameters("PARAMETER", 0)["values"].equals(
-        posterior_ens.load_parameters("PARAMETER", 0)["values"]
-    )
-
-
-@pytest.mark.usefixtures("copy_poly_case")
-def test_that_update_works_with_failed_realizations():
-    with open("poly_eval.py", "w", encoding="utf-8") as f:
-        f.write(
-            dedent(
-                """\
-                #!/usr/bin/env python
-                import numpy as np
-                import sys
-                import json
-
-                def _load_coeffs(filename):
-                    with open(filename, encoding="utf-8") as f:
-                        return json.load(f)["COEFFS"]
-
-                def _evaluate(coeffs, x):
-                    return coeffs["a"] * x**2 + coeffs["b"] * x + coeffs["c"]
-
-                if __name__ == "__main__":
-                    if np.random.random(1) > 0.5:
-                        sys.exit(1)
-                    coeffs = _load_coeffs("parameters.json")
-                    output = [_evaluate(coeffs, x) for x in range(10)]
-                    with open("poly.out", "w", encoding="utf-8") as f:
-                        f.write("\\n".join(map(str, output)))
-                """
-            )
-        )
-    os.chmod(
-        "poly_eval.py",
-        os.stat("poly_eval.py").st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH,
-    )
-
-    run_cli(
-        ENSEMBLE_SMOOTHER_MODE,
-        "--disable-monitor",
-        "poly.ert",
-    )
-
-    ert_config = ErtConfig.from_file("poly.ert")
-
-    with open_storage(ert_config.ens_path) as storage:
-        experiment = storage.get_experiment_by_name("es")
-        prior = experiment.get_ensemble_by_name("iter-0")
-        posterior = experiment.get_ensemble_by_name("iter-1")
-
-        assert all(
-            posterior.get_ensemble_state()[idx]
-            == RealizationStorageState.PARENT_FAILURE
-            for idx, v in enumerate(prior.get_ensemble_state())
-            if v == RealizationStorageState.LOAD_FAILURE
-        )
diff --git a/tests/integration_tests/scheduler/test_openpbs_driver.py b/tests/integration_tests/scheduler/test_openpbs_driver.py
index 1926472cfa5..a6f987b4d75 100644
--- a/tests/integration_tests/scheduler/test_openpbs_driver.py
+++ b/tests/integration_tests/scheduler/test_openpbs_driver.py
@@ -6,7 +6,7 @@
 from ert.cli.main import ErtCliError
 from ert.mode_definitions import ENSEMBLE_EXPERIMENT_MODE
 from ert.scheduler.openpbs_driver import OpenPBSDriver
-from tests.integration_tests.run_cli import run_cli
+from tests.ui_tests.cli.run_cli import run_cli
 
 from .conftest import mock_bin
 
diff --git a/tests/integration_tests/analysis/__init__.py b/tests/ui_tests/__init__.py
similarity index 100%
rename from tests/integration_tests/analysis/__init__.py
rename to tests/ui_tests/__init__.py
diff --git a/tests/integration_tests/shared/__init__.py b/tests/ui_tests/cli/__init__.py
similarity index 100%
rename from tests/integration_tests/shared/__init__.py
rename to tests/ui_tests/cli/__init__.py
diff --git a/tests/integration_tests/shared/share/__init__.py b/tests/ui_tests/cli/analysis/__init__.py
similarity index 100%
rename from tests/integration_tests/shared/share/__init__.py
rename to tests/ui_tests/cli/analysis/__init__.py
diff --git a/tests/integration_tests/analysis/snapshots/test_es_update/test_update_only_using_subset_observations/0/update_log b/tests/ui_tests/cli/analysis/snapshots/test_es_update/test_update_only_using_subset_observations/0/update_log
similarity index 100%
rename from tests/integration_tests/analysis/snapshots/test_es_update/test_update_only_using_subset_observations/0/update_log
rename to tests/ui_tests/cli/analysis/snapshots/test_es_update/test_update_only_using_subset_observations/0/update_log
diff --git a/tests/integration_tests/analysis/test_adaptive_localization.py b/tests/ui_tests/cli/analysis/test_adaptive_localization.py
similarity index 99%
rename from tests/integration_tests/analysis/test_adaptive_localization.py
rename to tests/ui_tests/cli/analysis/test_adaptive_localization.py
index 45af7253e4c..70bfa515dd4 100644
--- a/tests/integration_tests/analysis/test_adaptive_localization.py
+++ b/tests/ui_tests/cli/analysis/test_adaptive_localization.py
@@ -9,7 +9,7 @@
 from ert.storage import open_storage
 
 random_seed_line = "RANDOM_SEED 1234\n\n"
-from tests.integration_tests.run_cli import run_cli
+from tests.ui_tests.cli.run_cli import run_cli
 
 
 def run_cli_ES_with_case(poly_config):
diff --git a/tests/ui_tests/cli/analysis/test_es_update.py b/tests/ui_tests/cli/analysis/test_es_update.py
new file mode 100644
index 00000000000..fe583b2cf6c
--- /dev/null
+++ b/tests/ui_tests/cli/analysis/test_es_update.py
@@ -0,0 +1,207 @@
+import os
+import stat
+from pathlib import Path
+from textwrap import dedent
+
+import numpy as np
+import pytest
+from scipy.ndimage import gaussian_filter
+from xtgeo import RegularSurface, surface_from_file
+
+from ert import LibresFacade
+from ert.analysis._es_update import _all_parameters
+from ert.config import ErtConfig
+from ert.mode_definitions import ENSEMBLE_SMOOTHER_MODE
+from ert.storage import open_storage
+from ert.storage.realization_storage_state import RealizationStorageState
+from tests.ui_tests.cli.run_cli import run_cli
+
+
+@pytest.mark.usefixtures("copy_poly_case")
+def test_that_posterior_has_lower_variance_than_prior():
+    run_cli(
+        ENSEMBLE_SMOOTHER_MODE,
+        "--disable-monitor",
+        "--realizations",
+        "1-50",
+        "poly.ert",
+        "--experiment-name",
+        "es-test",
+    )
+    facade = LibresFacade.from_config_file("poly.ert")
+    with open_storage(facade.enspath) as storage:
+        experiment = storage.get_experiment_by_name("es-test")
+        prior_ensemble = experiment.get_ensemble_by_name("iter-0")
+        df_default = prior_ensemble.load_all_gen_kw_data()
+        posterior_ensemble = experiment.get_ensemble_by_name("iter-1")
+        df_target = posterior_ensemble.load_all_gen_kw_data()
+
+        # The std for the ensemble should decrease
+        assert float(
+            prior_ensemble.calculate_std_dev_for_parameter("COEFFS")["values"].sum()
+        ) > float(
+            posterior_ensemble.calculate_std_dev_for_parameter("COEFFS")["values"].sum()
+        )
+
+    # We expect that ERT's update step lowers the
+    # generalized variance for the parameters.
+    assert (
+        0
+        < np.linalg.det(df_target.cov().to_numpy())
+        < np.linalg.det(df_default.cov().to_numpy())
+    )
+
+
+@pytest.mark.integration_test
+@pytest.mark.usefixtures("copy_snake_oil_field")
+def test_that_surfaces_retain_their_order_when_loaded_and_saved_by_ert():
+    """This is a regression test to make sure ert does not use the wrong order
+    (row-major / column-major) when working with surfaces.
+    """
+    rng = np.random.default_rng()
+
+    def sample_prior(nx, ny):
+        return np.exp(
+            5
+            * gaussian_filter(
+                gaussian_filter(rng.random(size=(nx, ny)), sigma=2.0), sigma=1.0
+            )
+        )
+
+    nx = 5
+    ny = 7
+    ensemble_size = 2
+
+    Path("./surface").mkdir()
+    for i in range(ensemble_size):
+        surf = RegularSurface(
+            ncol=nx, nrow=ny, xinc=1.0, yinc=1.0, values=sample_prior(nx, ny)
+        )
+        surf.to_file(f"surface/surf_init_{i}.irap", fformat="irap_ascii")
+
+    # Single observation with a large ERROR to make sure the udpate is minimal.
+    obs = """
+    SUMMARY_OBSERVATION WOPR_OP1_9
+    {
+        VALUE   = 0.1;
+        ERROR   = 200.0;
+        DATE    = 2010-03-31;
+        KEY     = WOPR:OP1;
+    };
+    """
+
+    with open("observations/observations.txt", "w", encoding="utf-8") as file:
+        file.write(obs)
+
+    run_cli(
+        ENSEMBLE_SMOOTHER_MODE,
+        "--disable-monitor",
+        "snake_oil_surface.ert",
+    )
+
+    ert_config = ErtConfig.from_file("snake_oil_surface.ert")
+
+    storage = open_storage(ert_config.ens_path)
+    experiment = storage.get_experiment_by_name("es")
+    ens_prior = experiment.get_ensemble_by_name("iter-0")
+    ens_posterior = experiment.get_ensemble_by_name("iter-1")
+
+    # Check that surfaces defined in INIT_FILES are not changed by ERT
+    surf_prior = ens_prior.load_parameters("TOP", list(range(ensemble_size)))["values"]
+    for i in range(ensemble_size):
+        _prior_init = surface_from_file(
+            f"surface/surf_init_{i}.irap", fformat="irap_ascii", dtype=np.float32
+        )
+        np.testing.assert_array_equal(surf_prior[i], _prior_init.values.data)
+
+    surf_posterior = ens_posterior.load_parameters("TOP", list(range(ensemble_size)))[
+        "values"
+    ]
+
+    assert surf_prior.shape == surf_posterior.shape
+
+    for i in range(ensemble_size):
+        with pytest.raises(AssertionError):
+            np.testing.assert_array_equal(surf_prior[i], surf_posterior[i])
+        np.testing.assert_almost_equal(
+            surf_prior[i].values, surf_posterior[i].values, decimal=2
+        )
+
+
+@pytest.mark.integration_test
+@pytest.mark.usefixtures("copy_snake_oil_field")
+def test_update_multiple_param():
+    run_cli(
+        ENSEMBLE_SMOOTHER_MODE,
+        "--disable-monitor",
+        "snake_oil.ert",
+    )
+
+    ert_config = ErtConfig.from_file("snake_oil.ert")
+
+    storage = open_storage(ert_config.ens_path)
+    experiment = storage.get_experiment_by_name("es")
+    prior_ensemble = experiment.get_ensemble_by_name("iter-0")
+    posterior_ensemble = experiment.get_ensemble_by_name("iter-1")
+
+    prior_array = _all_parameters(prior_ensemble, list(range(10)))
+    posterior_array = _all_parameters(posterior_ensemble, list(range(10)))
+
+    # We expect that ERT's update step lowers the
+    # generalized variance for the parameters.
+    # https://en.wikipedia.org/wiki/Variance#For_vector-valued_random_variables
+    assert np.trace(np.cov(posterior_array)) < np.trace(np.cov(prior_array))
+
+
+@pytest.mark.usefixtures("copy_poly_case")
+def test_that_update_works_with_failed_realizations():
+    with open("poly_eval.py", "w", encoding="utf-8") as f:
+        f.write(
+            dedent(
+                """\
+                #!/usr/bin/env python
+                import numpy as np
+                import sys
+                import json
+
+                def _load_coeffs(filename):
+                    with open(filename, encoding="utf-8") as f:
+                        return json.load(f)["COEFFS"]
+
+                def _evaluate(coeffs, x):
+                    return coeffs["a"] * x**2 + coeffs["b"] * x + coeffs["c"]
+
+                if __name__ == "__main__":
+                    if np.random.random(1) > 0.5:
+                        sys.exit(1)
+                    coeffs = _load_coeffs("parameters.json")
+                    output = [_evaluate(coeffs, x) for x in range(10)]
+                    with open("poly.out", "w", encoding="utf-8") as f:
+                        f.write("\\n".join(map(str, output)))
+                """
+            )
+        )
+    os.chmod(
+        "poly_eval.py",
+        os.stat("poly_eval.py").st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH,
+    )
+
+    run_cli(
+        ENSEMBLE_SMOOTHER_MODE,
+        "--disable-monitor",
+        "poly.ert",
+    )
+
+    ert_config = ErtConfig.from_file("poly.ert")
+
+    with open_storage(ert_config.ens_path) as storage:
+        experiment = storage.get_experiment_by_name("es")
+        prior = experiment.get_ensemble_by_name("iter-0")
+        posterior = experiment.get_ensemble_by_name("iter-1")
+
+        assert all(
+            posterior.get_ensemble_state()[idx]
+            == RealizationStorageState.PARENT_FAILURE
+            for idx, v in enumerate(prior.get_ensemble_state())
+            if v == RealizationStorageState.LOAD_FAILURE
+        )
diff --git a/tests/integration_tests/run_cli.py b/tests/ui_tests/cli/run_cli.py
similarity index 100%
rename from tests/integration_tests/run_cli.py
rename to tests/ui_tests/cli/run_cli.py
diff --git a/tests/integration_tests/snapshots/test_cli/test_es_mda/es_mda_integration_snapshot b/tests/ui_tests/cli/snapshots/test_cli/test_es_mda/es_mda_integration_snapshot
similarity index 100%
rename from tests/integration_tests/snapshots/test_cli/test_es_mda/es_mda_integration_snapshot
rename to tests/ui_tests/cli/snapshots/test_cli/test_es_mda/es_mda_integration_snapshot
diff --git a/tests/integration_tests/snapshots/test_cli/test_es_mda/using_job_queue/es_mda_integration_snapshot b/tests/ui_tests/cli/snapshots/test_cli/test_es_mda/using_job_queue/es_mda_integration_snapshot
similarity index 100%
rename from tests/integration_tests/snapshots/test_cli/test_es_mda/using_job_queue/es_mda_integration_snapshot
rename to tests/ui_tests/cli/snapshots/test_cli/test_es_mda/using_job_queue/es_mda_integration_snapshot
diff --git a/tests/integration_tests/snapshots/test_cli/test_es_mda/using_scheduler/es_mda_integration_snapshot b/tests/ui_tests/cli/snapshots/test_cli/test_es_mda/using_scheduler/es_mda_integration_snapshot
similarity index 100%
rename from tests/integration_tests/snapshots/test_cli/test_es_mda/using_scheduler/es_mda_integration_snapshot
rename to tests/ui_tests/cli/snapshots/test_cli/test_es_mda/using_scheduler/es_mda_integration_snapshot
diff --git a/tests/integration_tests/test_cli.py b/tests/ui_tests/cli/test_cli.py
similarity index 100%
rename from tests/integration_tests/test_cli.py
rename to tests/ui_tests/cli/test_cli.py
diff --git a/tests/integration_tests/storage/test_field_parameter.py b/tests/ui_tests/cli/test_field_parameter.py
similarity index 99%
rename from tests/integration_tests/storage/test_field_parameter.py
rename to tests/ui_tests/cli/test_field_parameter.py
index 77530fb4286..64adebe7d5a 100644
--- a/tests/integration_tests/storage/test_field_parameter.py
+++ b/tests/ui_tests/cli/test_field_parameter.py
@@ -11,7 +11,8 @@
 from ert.config import ErtConfig
 from ert.mode_definitions import ENSEMBLE_SMOOTHER_MODE
 from ert.storage import open_storage
-from tests.integration_tests.run_cli import run_cli
+
+from .run_cli import run_cli
 
 
 @pytest.mark.integration_test
diff --git a/tests/integration_tests/test_observation_times.py b/tests/ui_tests/cli/test_observation_times.py
similarity index 100%
rename from tests/integration_tests/test_observation_times.py
rename to tests/ui_tests/cli/test_observation_times.py
diff --git a/tests/integration_tests/test_parameter_example.py b/tests/ui_tests/cli/test_parameter_example.py
similarity index 100%
rename from tests/integration_tests/test_parameter_example.py
rename to tests/ui_tests/cli/test_parameter_example.py
diff --git a/tests/integration_tests/storage/test_parameter_sample_types.py b/tests/ui_tests/cli/test_parameter_sample_types.py
similarity index 99%
rename from tests/integration_tests/storage/test_parameter_sample_types.py
rename to tests/ui_tests/cli/test_parameter_sample_types.py
index 525e98241ae..2de7cb6a247 100644
--- a/tests/integration_tests/storage/test_parameter_sample_types.py
+++ b/tests/ui_tests/cli/test_parameter_sample_types.py
@@ -11,7 +11,8 @@
 from ert.libres_facade import LibresFacade
 from ert.mode_definitions import ENSEMBLE_SMOOTHER_MODE
 from ert.storage import open_storage
-from tests.integration_tests.run_cli import run_cli
+
+from .run_cli import run_cli
 
 
 def load_from_forward_model(ert_config, ensemble):
diff --git a/tests/integration_tests/shared/share/test_shell.py b/tests/ui_tests/cli/test_shell.py
similarity index 97%
rename from tests/integration_tests/shared/share/test_shell.py
rename to tests/ui_tests/cli/test_shell.py
index 03a9b8035b2..e30418ada89 100644
--- a/tests/integration_tests/shared/share/test_shell.py
+++ b/tests/ui_tests/cli/test_shell.py
@@ -4,7 +4,8 @@
 import pytest
 
 from ert.plugins import ErtPluginManager
-from tests.integration_tests.run_cli import run_cli_with_pm
+
+from .run_cli import run_cli_with_pm
 
 
 @pytest.mark.integration_test
diff --git a/tests/integration_tests/storage/__init__.py b/tests/ui_tests/gui/__init__.py
similarity index 100%
rename from tests/integration_tests/storage/__init__.py
rename to tests/ui_tests/gui/__init__.py
diff --git a/tests/unit_tests/analysis/test_es_update.py b/tests/unit_tests/analysis/test_es_update.py
index 299f156c2b0..7cfdf14874f 100644
--- a/tests/unit_tests/analysis/test_es_update.py
+++ b/tests/unit_tests/analysis/test_es_update.py
@@ -849,3 +849,229 @@ def test_that_autoscaling_applies_to_scaled_errors(storage):
 
         assert scaled_errors_with_autoscale.tolist() == [2, 6]
         assert scaled_errors_without_autoscale.tolist() == [1, 2]
+
+
+def test_gen_data_obs_data_mismatch(storage, uniform_parameter):
+    resp = GenDataConfig(keys=["RESPONSE"])
+    obs = xr.Dataset(
+        {
+            "observations": (["report_step", "index"], [[1.0]]),
+            "std": (["report_step", "index"], [[0.1]]),
+        },
+        coords={"index": [1000], "report_step": [0]},
+        attrs={"response": "RESPONSE"},
+    )
+    experiment = storage.create_experiment(
+        parameters=[uniform_parameter],
+        responses=[resp],
+        observations={"OBSERVATION": obs},
+    )
+    prior = storage.create_ensemble(
+        experiment,
+        ensemble_size=10,
+        iteration=0,
+        name="prior",
+    )
+    rng = np.random.default_rng(1234)
+    for iens in range(prior.ensemble_size):
+        data = rng.uniform(0, 1)
+        prior.save_parameters(
+            "PARAMETER",
+            iens,
+            xr.Dataset(
+                {
+                    "values": ("names", [data]),
+                    "transformed_values": ("names", [data]),
+                    "names": ["KEY_1"],
+                }
+            ),
+        )
+        data = rng.uniform(0.8, 1, 3)
+        prior.save_response(
+            "gen_data",
+            xr.Dataset(
+                {"values": (["name", "report_step", "index"], [[data]])},
+                coords={
+                    "name": ["RESPONSE"],
+                    "index": range(len(data)),
+                    "report_step": [0],
+                },
+            ),
+            iens,
+        )
+    posterior_ens = storage.create_ensemble(
+        prior.experiment_id,
+        ensemble_size=prior.ensemble_size,
+        iteration=1,
+        name="posterior",
+        prior_ensemble=prior,
+    )
+    with pytest.raises(
+        ErtAnalysisError,
+        match="No active observations",
+    ):
+        smoother_update(
+            prior,
+            posterior_ens,
+            ["OBSERVATION"],
+            ["PARAMETER"],
+            UpdateSettings(),
+            ESSettings(),
+        )
+
+
+@pytest.mark.usefixtures("use_tmpdir")
+def test_gen_data_missing(storage, uniform_parameter, obs):
+    resp = GenDataConfig(keys=["RESPONSE"])
+    experiment = storage.create_experiment(
+        parameters=[uniform_parameter],
+        responses=[resp],
+        observations={"OBSERVATION": obs},
+    )
+    prior = storage.create_ensemble(
+        experiment,
+        ensemble_size=10,
+        iteration=0,
+        name="prior",
+    )
+    rng = np.random.default_rng(1234)
+    for iens in range(prior.ensemble_size):
+        data = rng.uniform(0, 1)
+        prior.save_parameters(
+            "PARAMETER",
+            iens,
+            xr.Dataset(
+                {
+                    "values": ("names", [data]),
+                    "transformed_values": ("names", [data]),
+                    "names": ["KEY_1"],
+                }
+            ),
+        )
+        data = rng.uniform(0.8, 1, 2)  # Importantly, shorter than obs
+        prior.save_response(
+            "gen_data",
+            xr.Dataset(
+                {"values": (["name", "report_step", "index"], [[data]])},
+                coords={
+                    "name": ["RESPONSE"],
+                    "index": range(len(data)),
+                    "report_step": [0],
+                },
+            ),
+            iens,
+        )
+    posterior_ens = storage.create_ensemble(
+        prior.experiment_id,
+        ensemble_size=prior.ensemble_size,
+        iteration=1,
+        name="posterior",
+        prior_ensemble=prior,
+    )
+    events = []
+
+    update_snapshot = smoother_update(
+        prior,
+        posterior_ens,
+        ["OBSERVATION"],
+        ["PARAMETER"],
+        UpdateSettings(),
+        ESSettings(),
+        progress_callback=events.append,
+    )
+    assert [step.status for step in update_snapshot.update_step_snapshots] == [
+        ObservationStatus.ACTIVE,
+        ObservationStatus.ACTIVE,
+        ObservationStatus.MISSING_RESPONSE,
+    ]
+
+    update_event = next(e for e in events if isinstance(e, AnalysisCompleteEvent))
+    data_section = update_event.data
+    assert data_section.extra["Active observations"] == "2"
+    assert data_section.extra["Deactivated observations - missing respons(es)"] == "1"
+
+
+@pytest.mark.usefixtures("use_tmpdir")
+def test_update_subset_parameters(storage, uniform_parameter, obs):
+    no_update_param = GenKwConfig(
+        name="EXTRA_PARAMETER",
+        forward_init=False,
+        template_file="",
+        transform_function_definitions=[
+            TransformFunctionDefinition("KEY1", "UNIFORM", [0, 1]),
+        ],
+        output_file=None,
+        update=False,
+    )
+    resp = GenDataConfig(keys=["RESPONSE"])
+    experiment = storage.create_experiment(
+        parameters=[uniform_parameter, no_update_param],
+        responses=[resp],
+        observations={"OBSERVATION": obs},
+    )
+    prior = storage.create_ensemble(
+        experiment,
+        ensemble_size=10,
+        iteration=0,
+        name="prior",
+    )
+    rng = np.random.default_rng(1234)
+    for iens in range(prior.ensemble_size):
+        data = rng.uniform(0, 1)
+        prior.save_parameters(
+            "PARAMETER",
+            iens,
+            xr.Dataset(
+                {
+                    "values": ("names", [data]),
+                    "transformed_values": ("names", [data]),
+                    "names": ["KEY_1"],
+                }
+            ),
+        )
+        prior.save_parameters(
+            "EXTRA_PARAMETER",
+            iens,
+            xr.Dataset(
+                {
+                    "values": ("names", [data]),
+                    "transformed_values": ("names", [data]),
+                    "names": ["KEY_1"],
+                }
+            ),
+        )
+
+        data = rng.uniform(0.8, 1, 10)
+        prior.save_response(
+            "gen_data",
+            xr.Dataset(
+                {"values": (["name", "report_step", "index"], [[data]])},
+                coords={
+                    "name": ["RESPONSE"],
+                    "index": range(len(data)),
+                    "report_step": [0],
+                },
+            ),
+            iens,
+        )
+    posterior_ens = storage.create_ensemble(
+        prior.experiment_id,
+        ensemble_size=prior.ensemble_size,
+        iteration=1,
+        name="posterior",
+        prior_ensemble=prior,
+    )
+    smoother_update(
+        prior,
+        posterior_ens,
+        ["OBSERVATION"],
+        ["PARAMETER"],
+        UpdateSettings(),
+        ESSettings(),
+    )
+    assert prior.load_parameters("EXTRA_PARAMETER", 0)["values"].equals(
+        posterior_ens.load_parameters("EXTRA_PARAMETER", 0)["values"]
+    )
+    assert not prior.load_parameters("PARAMETER", 0)["values"].equals(
+        posterior_ens.load_parameters("PARAMETER", 0)["values"]
+    )
diff --git a/tests/unit_tests/gui/tools/test_manage_experiments_tool.py b/tests/unit_tests/gui/tools/test_manage_experiments_tool.py
index cad22e6cf8a..7babb225c51 100644
--- a/tests/unit_tests/gui/tools/test_manage_experiments_tool.py
+++ b/tests/unit_tests/gui/tools/test_manage_experiments_tool.py
@@ -16,7 +16,7 @@
 from ert.gui.tools.manage_experiments.storage_widget import StorageWidget
 from ert.storage import Storage, open_storage
 from ert.storage.realization_storage_state import RealizationStorageState
-from tests.integration_tests.analysis.test_adaptive_localization import (
+from tests.ui_tests.cli.analysis.test_adaptive_localization import (
     run_cli_ES_with_case,
 )
 

From 599906c2a19f910de223002857b1d898ed9b97e6 Mon Sep 17 00:00:00 2001
From: Eivind Jahren <ejah@equinor.com>
Date: Fri, 13 Sep 2024 10:10:30 +0200
Subject: [PATCH 02/11] Move interaction focused gui tests to ui_tests

---
 .../share/__init__.py                         |   0
 .../share/_import_from_location.py            |   0
 .../share/ecl_run_fail                        |   0
 .../share/test_ecl_run_new_config.py          |   0
 .../share/test_ecl_versioning_config.py       |   0
 .../share/test_forward_models.py              |   0
 .../share/test_opm_flow.py                    |   0
 .../share/test_shell.py                       |   0
 .../share/test_subprocess.py                  |   0
 .../share/test_templating.py                  |   0
 tests/performance_tests/test_snapshot.py      |   7 +-
 .../test_histogram_log_scale-empty.png        | Bin
 .../test_histogram_log_scale-float.png        | Bin
 .../test_histogram_no_ensembles-empty.png     | Bin
 ...est_histogram_no_log_scale-categorical.png | Bin
 .../test_histogram_no_log_scale-empty.png     | Bin
 .../test_histogram_no_log_scale-float.png     | Bin
 ...ations_matches_snapshot_plot_figure0-0.png | Bin
 ...isations_matches_snapshot_plot_figure0.png | Bin
 ...ations_matches_snapshot_plot_figure1-0.png | Bin
 ...isations_matches_snapshot_plot_figure1.png | Bin
 ...ations_matches_snapshot_plot_figure2-0.png | Bin
 ...isations_matches_snapshot_plot_figure2.png | Bin
 ...ations_matches_snapshot_plot_figure3-0.png | Bin
 ...isations_matches_snapshot_plot_figure3.png | Bin
 ...ations_matches_snapshot_plot_figure4-0.png | Bin
 ...isations_matches_snapshot_plot_figure4.png | Bin
 ...ations_matches_snapshot_plot_figure5-0.png | Bin
 ...isations_matches_snapshot_plot_figure5.png | Bin
 ...ations_matches_snapshot_plot_figure6-0.png | Bin
 ...isations_matches_snapshot_plot_figure6.png | Bin
 tests/ui_tests/gui/conftest.py                | 427 +++++++++++++++++
 .../gui/test_csv_export.py                    |   0
 .../gui/test_full_manual_update_workflow.py   |   0
 .../gui}/test_load_results_manually.py        |   2 +-
 .../gui/test_main_window.py                   |   0
 .../gui}/test_plotting_of_snake_oil.py        |   3 +-
 .../gui/test_restart_ensemble_experiment.py   |   0
 .../gui/test_restart_esmda.py                 |   0
 ...est_restart_no_responses_and_parameters.py |   0
 .../gui/test_rft_export_plugin.py             |   0
 .../gui/test_single_test_run.py               |   0
 .../gui}/test_workflow_tool.py                |   2 +-
 tests/unit_tests/gui/conftest.py              | 446 +-----------------
 .../test_plot_case_selection_widget.py        |   3 +-
 .../test_evaluate_ensemble_panel.py           |   2 +-
 .../gui/simulation/test_run_dialog.py         |   3 +-
 .../gui/simulation/view/test_legend.py        |   2 +-
 .../gui/simulation/view/test_realization.py   |  31 ++
 49 files changed, 471 insertions(+), 457 deletions(-)
 rename tests/{unit_tests/shared => integration_tests}/share/__init__.py (100%)
 rename tests/{unit_tests/shared => integration_tests}/share/_import_from_location.py (100%)
 rename tests/{unit_tests/shared => integration_tests}/share/ecl_run_fail (100%)
 rename tests/{unit_tests/shared => integration_tests}/share/test_ecl_run_new_config.py (100%)
 rename tests/{unit_tests/shared => integration_tests}/share/test_ecl_versioning_config.py (100%)
 rename tests/{unit_tests/shared => integration_tests}/share/test_forward_models.py (100%)
 rename tests/{unit_tests/shared => integration_tests}/share/test_opm_flow.py (100%)
 rename tests/{unit_tests/shared => integration_tests}/share/test_shell.py (100%)
 rename tests/{unit_tests/shared => integration_tests}/share/test_subprocess.py (100%)
 rename tests/{unit_tests/shared => integration_tests}/share/test_templating.py (100%)
 rename tests/{unit_tests/gui/plottery => ui_tests/gui}/baseline/test_histogram_log_scale-empty.png (100%)
 rename tests/{unit_tests/gui/plottery => ui_tests/gui}/baseline/test_histogram_log_scale-float.png (100%)
 rename tests/{unit_tests/gui/plottery => ui_tests/gui}/baseline/test_histogram_no_ensembles-empty.png (100%)
 rename tests/{unit_tests/gui/plottery => ui_tests/gui}/baseline/test_histogram_no_log_scale-categorical.png (100%)
 rename tests/{unit_tests/gui/plottery => ui_tests/gui}/baseline/test_histogram_no_log_scale-empty.png (100%)
 rename tests/{unit_tests/gui/plottery => ui_tests/gui}/baseline/test_histogram_no_log_scale-float.png (100%)
 rename tests/{unit_tests/gui/plottery => ui_tests/gui}/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure0-0.png (100%)
 rename tests/{unit_tests/gui/plottery => ui_tests/gui}/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure0.png (100%)
 rename tests/{unit_tests/gui/plottery => ui_tests/gui}/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure1-0.png (100%)
 rename tests/{unit_tests/gui/plottery => ui_tests/gui}/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure1.png (100%)
 rename tests/{unit_tests/gui/plottery => ui_tests/gui}/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure2-0.png (100%)
 rename tests/{unit_tests/gui/plottery => ui_tests/gui}/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure2.png (100%)
 rename tests/{unit_tests/gui/plottery => ui_tests/gui}/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure3-0.png (100%)
 rename tests/{unit_tests/gui/plottery => ui_tests/gui}/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure3.png (100%)
 rename tests/{unit_tests/gui/plottery => ui_tests/gui}/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure4-0.png (100%)
 rename tests/{unit_tests/gui/plottery => ui_tests/gui}/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure4.png (100%)
 rename tests/{unit_tests/gui/plottery => ui_tests/gui}/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure5-0.png (100%)
 rename tests/{unit_tests/gui/plottery => ui_tests/gui}/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure5.png (100%)
 rename tests/{unit_tests/gui/plottery => ui_tests/gui}/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure6-0.png (100%)
 rename tests/{unit_tests/gui/plottery => ui_tests/gui}/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure6.png (100%)
 create mode 100644 tests/ui_tests/gui/conftest.py
 rename tests/{unit_tests => ui_tests}/gui/test_csv_export.py (100%)
 rename tests/{unit_tests => ui_tests}/gui/test_full_manual_update_workflow.py (100%)
 rename tests/{unit_tests/gui/tools => ui_tests/gui}/test_load_results_manually.py (98%)
 rename tests/{unit_tests => ui_tests}/gui/test_main_window.py (100%)
 rename tests/{unit_tests/gui/plottery => ui_tests/gui}/test_plotting_of_snake_oil.py (99%)
 rename tests/{unit_tests => ui_tests}/gui/test_restart_ensemble_experiment.py (100%)
 rename tests/{unit_tests => ui_tests}/gui/test_restart_esmda.py (100%)
 rename tests/{unit_tests => ui_tests}/gui/test_restart_no_responses_and_parameters.py (100%)
 rename tests/{unit_tests => ui_tests}/gui/test_rft_export_plugin.py (100%)
 rename tests/{unit_tests => ui_tests}/gui/test_single_test_run.py (100%)
 rename tests/{unit_tests/gui/tools => ui_tests/gui}/test_workflow_tool.py (98%)

diff --git a/tests/unit_tests/shared/share/__init__.py b/tests/integration_tests/share/__init__.py
similarity index 100%
rename from tests/unit_tests/shared/share/__init__.py
rename to tests/integration_tests/share/__init__.py
diff --git a/tests/unit_tests/shared/share/_import_from_location.py b/tests/integration_tests/share/_import_from_location.py
similarity index 100%
rename from tests/unit_tests/shared/share/_import_from_location.py
rename to tests/integration_tests/share/_import_from_location.py
diff --git a/tests/unit_tests/shared/share/ecl_run_fail b/tests/integration_tests/share/ecl_run_fail
similarity index 100%
rename from tests/unit_tests/shared/share/ecl_run_fail
rename to tests/integration_tests/share/ecl_run_fail
diff --git a/tests/unit_tests/shared/share/test_ecl_run_new_config.py b/tests/integration_tests/share/test_ecl_run_new_config.py
similarity index 100%
rename from tests/unit_tests/shared/share/test_ecl_run_new_config.py
rename to tests/integration_tests/share/test_ecl_run_new_config.py
diff --git a/tests/unit_tests/shared/share/test_ecl_versioning_config.py b/tests/integration_tests/share/test_ecl_versioning_config.py
similarity index 100%
rename from tests/unit_tests/shared/share/test_ecl_versioning_config.py
rename to tests/integration_tests/share/test_ecl_versioning_config.py
diff --git a/tests/unit_tests/shared/share/test_forward_models.py b/tests/integration_tests/share/test_forward_models.py
similarity index 100%
rename from tests/unit_tests/shared/share/test_forward_models.py
rename to tests/integration_tests/share/test_forward_models.py
diff --git a/tests/unit_tests/shared/share/test_opm_flow.py b/tests/integration_tests/share/test_opm_flow.py
similarity index 100%
rename from tests/unit_tests/shared/share/test_opm_flow.py
rename to tests/integration_tests/share/test_opm_flow.py
diff --git a/tests/unit_tests/shared/share/test_shell.py b/tests/integration_tests/share/test_shell.py
similarity index 100%
rename from tests/unit_tests/shared/share/test_shell.py
rename to tests/integration_tests/share/test_shell.py
diff --git a/tests/unit_tests/shared/share/test_subprocess.py b/tests/integration_tests/share/test_subprocess.py
similarity index 100%
rename from tests/unit_tests/shared/share/test_subprocess.py
rename to tests/integration_tests/share/test_subprocess.py
diff --git a/tests/unit_tests/shared/share/test_templating.py b/tests/integration_tests/share/test_templating.py
similarity index 100%
rename from tests/unit_tests/shared/share/test_templating.py
rename to tests/integration_tests/share/test_templating.py
diff --git a/tests/performance_tests/test_snapshot.py b/tests/performance_tests/test_snapshot.py
index 7bd0f7620e1..39139bb4104 100644
--- a/tests/performance_tests/test_snapshot.py
+++ b/tests/performance_tests/test_snapshot.py
@@ -14,12 +14,13 @@
     FMStepSnapshot,
     RealizationSnapshot,
 )
-
-from ..unit_tests.gui.conftest import (  # noqa: F401
+from tests.ui_tests.gui.conftest import (  # noqa: F401
     active_realizations_fixture,
+)
+from tests.unit_tests.gui.conftest import (  # noqa: F401
     large_snapshot,
 )
-from ..unit_tests.gui.simulation.test_run_dialog import (  # noqa: F401
+from tests.unit_tests.gui.simulation.test_run_dialog import (  # noqa: F401
     event_queue,
     notifier,
     run_dialog,
diff --git a/tests/unit_tests/gui/plottery/baseline/test_histogram_log_scale-empty.png b/tests/ui_tests/gui/baseline/test_histogram_log_scale-empty.png
similarity index 100%
rename from tests/unit_tests/gui/plottery/baseline/test_histogram_log_scale-empty.png
rename to tests/ui_tests/gui/baseline/test_histogram_log_scale-empty.png
diff --git a/tests/unit_tests/gui/plottery/baseline/test_histogram_log_scale-float.png b/tests/ui_tests/gui/baseline/test_histogram_log_scale-float.png
similarity index 100%
rename from tests/unit_tests/gui/plottery/baseline/test_histogram_log_scale-float.png
rename to tests/ui_tests/gui/baseline/test_histogram_log_scale-float.png
diff --git a/tests/unit_tests/gui/plottery/baseline/test_histogram_no_ensembles-empty.png b/tests/ui_tests/gui/baseline/test_histogram_no_ensembles-empty.png
similarity index 100%
rename from tests/unit_tests/gui/plottery/baseline/test_histogram_no_ensembles-empty.png
rename to tests/ui_tests/gui/baseline/test_histogram_no_ensembles-empty.png
diff --git a/tests/unit_tests/gui/plottery/baseline/test_histogram_no_log_scale-categorical.png b/tests/ui_tests/gui/baseline/test_histogram_no_log_scale-categorical.png
similarity index 100%
rename from tests/unit_tests/gui/plottery/baseline/test_histogram_no_log_scale-categorical.png
rename to tests/ui_tests/gui/baseline/test_histogram_no_log_scale-categorical.png
diff --git a/tests/unit_tests/gui/plottery/baseline/test_histogram_no_log_scale-empty.png b/tests/ui_tests/gui/baseline/test_histogram_no_log_scale-empty.png
similarity index 100%
rename from tests/unit_tests/gui/plottery/baseline/test_histogram_no_log_scale-empty.png
rename to tests/ui_tests/gui/baseline/test_histogram_no_log_scale-empty.png
diff --git a/tests/unit_tests/gui/plottery/baseline/test_histogram_no_log_scale-float.png b/tests/ui_tests/gui/baseline/test_histogram_no_log_scale-float.png
similarity index 100%
rename from tests/unit_tests/gui/plottery/baseline/test_histogram_no_log_scale-float.png
rename to tests/ui_tests/gui/baseline/test_histogram_no_log_scale-float.png
diff --git a/tests/unit_tests/gui/plottery/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure0-0.png b/tests/ui_tests/gui/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure0-0.png
similarity index 100%
rename from tests/unit_tests/gui/plottery/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure0-0.png
rename to tests/ui_tests/gui/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure0-0.png
diff --git a/tests/unit_tests/gui/plottery/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure0.png b/tests/ui_tests/gui/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure0.png
similarity index 100%
rename from tests/unit_tests/gui/plottery/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure0.png
rename to tests/ui_tests/gui/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure0.png
diff --git a/tests/unit_tests/gui/plottery/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure1-0.png b/tests/ui_tests/gui/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure1-0.png
similarity index 100%
rename from tests/unit_tests/gui/plottery/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure1-0.png
rename to tests/ui_tests/gui/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure1-0.png
diff --git a/tests/unit_tests/gui/plottery/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure1.png b/tests/ui_tests/gui/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure1.png
similarity index 100%
rename from tests/unit_tests/gui/plottery/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure1.png
rename to tests/ui_tests/gui/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure1.png
diff --git a/tests/unit_tests/gui/plottery/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure2-0.png b/tests/ui_tests/gui/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure2-0.png
similarity index 100%
rename from tests/unit_tests/gui/plottery/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure2-0.png
rename to tests/ui_tests/gui/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure2-0.png
diff --git a/tests/unit_tests/gui/plottery/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure2.png b/tests/ui_tests/gui/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure2.png
similarity index 100%
rename from tests/unit_tests/gui/plottery/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure2.png
rename to tests/ui_tests/gui/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure2.png
diff --git a/tests/unit_tests/gui/plottery/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure3-0.png b/tests/ui_tests/gui/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure3-0.png
similarity index 100%
rename from tests/unit_tests/gui/plottery/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure3-0.png
rename to tests/ui_tests/gui/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure3-0.png
diff --git a/tests/unit_tests/gui/plottery/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure3.png b/tests/ui_tests/gui/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure3.png
similarity index 100%
rename from tests/unit_tests/gui/plottery/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure3.png
rename to tests/ui_tests/gui/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure3.png
diff --git a/tests/unit_tests/gui/plottery/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure4-0.png b/tests/ui_tests/gui/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure4-0.png
similarity index 100%
rename from tests/unit_tests/gui/plottery/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure4-0.png
rename to tests/ui_tests/gui/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure4-0.png
diff --git a/tests/unit_tests/gui/plottery/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure4.png b/tests/ui_tests/gui/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure4.png
similarity index 100%
rename from tests/unit_tests/gui/plottery/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure4.png
rename to tests/ui_tests/gui/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure4.png
diff --git a/tests/unit_tests/gui/plottery/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure5-0.png b/tests/ui_tests/gui/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure5-0.png
similarity index 100%
rename from tests/unit_tests/gui/plottery/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure5-0.png
rename to tests/ui_tests/gui/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure5-0.png
diff --git a/tests/unit_tests/gui/plottery/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure5.png b/tests/ui_tests/gui/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure5.png
similarity index 100%
rename from tests/unit_tests/gui/plottery/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure5.png
rename to tests/ui_tests/gui/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure5.png
diff --git a/tests/unit_tests/gui/plottery/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure6-0.png b/tests/ui_tests/gui/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure6-0.png
similarity index 100%
rename from tests/unit_tests/gui/plottery/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure6-0.png
rename to tests/ui_tests/gui/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure6-0.png
diff --git a/tests/unit_tests/gui/plottery/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure6.png b/tests/ui_tests/gui/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure6.png
similarity index 100%
rename from tests/unit_tests/gui/plottery/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure6.png
rename to tests/ui_tests/gui/baseline/test_that_all_snake_oil_visualisations_matches_snapshot_plot_figure6.png
diff --git a/tests/ui_tests/gui/conftest.py b/tests/ui_tests/gui/conftest.py
new file mode 100644
index 00000000000..0dd240dc8ae
--- /dev/null
+++ b/tests/ui_tests/gui/conftest.py
@@ -0,0 +1,427 @@
+import contextlib
+import fileinput
+import os
+import os.path
+import shutil
+import stat
+import time
+from contextlib import contextmanager
+from pathlib import Path
+from textwrap import dedent
+from typing import Generator, List, Tuple, Type, TypeVar
+from unittest.mock import MagicMock, Mock
+
+import pytest
+from pytestqt.qtbot import QtBot
+from qtpy.QtCore import Qt, QTimer
+from qtpy.QtWidgets import QApplication, QComboBox, QMessageBox, QPushButton, QWidget
+
+from ert.config import ErtConfig
+from ert.gui.ertwidgets import ClosableDialog
+from ert.gui.ertwidgets.create_experiment_dialog import CreateExperimentDialog
+from ert.gui.ertwidgets.ensembleselector import EnsembleSelector
+from ert.gui.main import ErtMainWindow, _setup_main_window, add_gui_log_handler
+from ert.gui.simulation.experiment_panel import ExperimentPanel
+from ert.gui.simulation.run_dialog import RunDialog
+from ert.gui.simulation.view import RealizationWidget
+from ert.gui.tools.load_results.load_results_panel import LoadResultsPanel
+from ert.gui.tools.manage_experiments.manage_experiments_tool import (
+    ManageExperimentsTool,
+)
+from ert.gui.tools.manage_experiments.storage_widget import AddWidget, StorageWidget
+from ert.plugins import ErtPluginContext
+from ert.run_models import EnsembleExperiment, MultipleDataAssimilation
+from ert.services import StorageService
+from ert.storage import Storage, open_storage
+from tests.unit_tests.gui.simulation.test_run_path_dialog import handle_run_path_dialog
+
+
+@pytest.fixture
+def opened_main_window(
+    source_root, tmp_path, monkeypatch
+) -> Generator[ErtMainWindow, None, None]:
+    monkeypatch.chdir(tmp_path)
+    _new_poly_example(source_root, tmp_path)
+    with _open_main_window(tmp_path / "poly.ert") as (
+        gui,
+        _,
+        config,
+    ), StorageService.init_service(
+        project=os.path.abspath(config.ens_path),
+    ):
+        yield gui
+
+
+def _new_poly_example(source_root, destination, num_realizations: int = 20):
+    shutil.copytree(
+        os.path.join(source_root, "test-data", "poly_example"),
+        destination,
+        dirs_exist_ok=True,
+    )
+
+    with fileinput.input(destination / "poly.ert", inplace=True) as fin:
+        for line in fin:
+            if "NUM_REALIZATIONS" in line:
+                # Decrease the number of realizations to speed up the test,
+                # if there is flakyness, this can be increased.
+                print(f"NUM_REALIZATIONS {num_realizations}", end="\n")
+            else:
+                print(line, end="")
+
+
+@contextmanager
+def _open_main_window(
+    path,
+) -> Generator[Tuple[ErtMainWindow, Storage, ErtConfig], None, None]:
+    args_mock = Mock()
+    args_mock.config = str(path)
+    with ErtPluginContext():
+        config = ErtConfig.with_plugins().from_file(path)
+        with open_storage(
+            config.ens_path, mode="w"
+        ) as storage, add_gui_log_handler() as log_handler:
+            gui = _setup_main_window(config, args_mock, log_handler, storage)
+            yield gui, storage, config
+            gui.close()
+
+
+@pytest.fixture
+def opened_main_window_clean(source_root, tmp_path, monkeypatch):
+    monkeypatch.chdir(tmp_path)
+    _new_poly_example(source_root, tmp_path)
+    with _open_main_window(tmp_path / "poly.ert") as (
+        gui,
+        _,
+        config,
+    ), StorageService.init_service(
+        project=os.path.abspath(config.ens_path),
+    ):
+        yield gui
+
+
+@pytest.fixture
+def opened_main_window_minimal_realizations(source_root, tmp_path, monkeypatch):
+    monkeypatch.chdir(tmp_path)
+    _new_poly_example(source_root, tmp_path, 2)
+    with _open_main_window(tmp_path / "poly.ert") as (
+        gui,
+        _,
+        config,
+    ), StorageService.init_service(
+        project=os.path.abspath(config.ens_path),
+    ):
+        yield gui
+
+
+@pytest.fixture
+def opened_main_window_snake_oil(snake_oil_case_storage):
+    with _open_main_window(Path("./snake_oil.ert")) as (
+        gui,
+        _,
+        config,
+    ), StorageService.init_service(
+        project=os.path.abspath(config.ens_path),
+    ):
+        yield gui
+
+
+@pytest.fixture(scope="module")
+def _esmda_run(run_experiment, source_root, tmp_path_factory):
+    path = tmp_path_factory.mktemp("test-data")
+    _new_poly_example(source_root, path)
+    with pytest.MonkeyPatch.context() as mp, _open_main_window(path / "poly.ert") as (
+        gui,
+        _,
+        config,
+    ):
+        mp.chdir(path)
+        run_experiment(MultipleDataAssimilation, gui)
+        # Check that we produce update log
+        log_paths = list(Path(config.analysis_config.log_path).iterdir())
+        assert log_paths
+        assert (log_paths[0] / "Report.report").exists()
+        assert (log_paths[0] / "Report.csv").exists()
+
+    return path
+
+
+def _ensemble_experiment_run(
+    run_experiment, source_root, tmp_path_factory, failing_reals
+):
+    path = tmp_path_factory.mktemp("test-data")
+    _new_poly_example(source_root, path)
+    with pytest.MonkeyPatch.context() as mp, _open_main_window(path / "poly.ert") as (
+        gui,
+        _,
+        _,
+    ):
+        mp.chdir(path)
+        if failing_reals:
+            with open("poly_eval.py", "w", encoding="utf-8") as f:
+                f.write(
+                    dedent(
+                        """\
+                        #!/usr/bin/env python3
+                        import os
+                        import sys
+                        import json
+
+                        def _load_coeffs(filename):
+                            with open(filename, encoding="utf-8") as f:
+                                return json.load(f)["COEFFS"]
+
+                        def _evaluate(coeffs, x):
+                            return coeffs["a"] * x**2 + coeffs["b"] * x + coeffs["c"]
+
+                        if __name__ == "__main__":
+                            if int(os.getenv("_ERT_REALIZATION_NUMBER")) % 2 == 0:
+                                sys.exit(1)
+                            coeffs = _load_coeffs("parameters.json")
+                            output = [_evaluate(coeffs, x) for x in range(10)]
+                            with open("poly.out", "w", encoding="utf-8") as f:
+                                f.write("\\n".join(map(str, output)))
+                        """
+                    )
+                )
+            os.chmod(
+                "poly_eval.py",
+                os.stat("poly_eval.py").st_mode
+                | stat.S_IXUSR
+                | stat.S_IXGRP
+                | stat.S_IXOTH,
+            )
+        run_experiment(EnsembleExperiment, gui)
+
+    return path
+
+
+@pytest.fixture
+def esmda_has_run(_esmda_run, tmp_path, monkeypatch):
+    monkeypatch.chdir(tmp_path)
+    shutil.copytree(_esmda_run, tmp_path, dirs_exist_ok=True)
+    with _open_main_window(tmp_path / "poly.ert") as (
+        gui,
+        _,
+        config,
+    ), StorageService.init_service(
+        project=os.path.abspath(config.ens_path),
+    ):
+        yield gui
+
+
+@pytest.fixture
+def ensemble_experiment_has_run(
+    tmp_path, monkeypatch, run_experiment, source_root, tmp_path_factory
+):
+    monkeypatch.chdir(tmp_path)
+    test_files = _ensemble_experiment_run(
+        run_experiment, source_root, tmp_path_factory, True
+    )
+    shutil.copytree(test_files, tmp_path, dirs_exist_ok=True)
+    with _open_main_window(tmp_path / "poly.ert") as (
+        gui,
+        _,
+        config,
+    ), StorageService.init_service(
+        project=os.path.abspath(config.ens_path),
+    ):
+        yield gui
+
+
+@pytest.fixture
+def ensemble_experiment_has_run_no_failure(
+    tmp_path, monkeypatch, run_experiment, source_root, tmp_path_factory
+):
+    monkeypatch.chdir(tmp_path)
+    test_files = _ensemble_experiment_run(
+        run_experiment, source_root, tmp_path_factory, False
+    )
+    shutil.copytree(test_files, tmp_path, dirs_exist_ok=True)
+    with _open_main_window(tmp_path / "poly.ert") as (
+        gui,
+        _,
+        config,
+    ), StorageService.init_service(
+        project=os.path.abspath(config.ens_path),
+    ):
+        yield gui
+
+
+@pytest.fixture(name="run_experiment", scope="module")
+def run_experiment_fixture(request):
+    def func(experiment_mode, gui):
+        qtbot = QtBot(request)
+        with contextlib.suppress(FileNotFoundError):
+            shutil.rmtree("poly_out")
+        # Select correct experiment in the simulation panel
+        experiment_panel = gui.findChild(ExperimentPanel)
+        assert isinstance(experiment_panel, ExperimentPanel)
+        simulation_mode_combo = experiment_panel.findChild(QComboBox)
+        assert isinstance(simulation_mode_combo, QComboBox)
+        simulation_mode_combo.setCurrentText(experiment_mode.name())
+        simulation_settings = experiment_panel._experiment_widgets[
+            experiment_panel.get_current_experiment_type()
+        ]
+        if hasattr(simulation_settings, "_ensemble_name_field"):
+            simulation_settings._ensemble_name_field.setText("iter-0")
+
+        # Click start simulation and agree to the message
+        run_experiment = experiment_panel.findChild(QWidget, name="run_experiment")
+
+        def handle_dialog():
+            QTimer.singleShot(
+                500,
+                lambda: handle_run_path_dialog(gui, qtbot, delete_run_path=False),
+            )
+
+        if not experiment_mode.name() in (
+            "Ensemble experiment",
+            "Evaluate ensemble",
+        ):
+            QTimer.singleShot(500, handle_dialog)
+        qtbot.mouseClick(run_experiment, Qt.LeftButton)
+
+        # The Run dialog opens, click show details and wait until done appears
+        # then click it
+        qtbot.waitUntil(lambda: gui.findChild(RunDialog) is not None)
+        run_dialog = gui.findChild(RunDialog)
+
+        qtbot.waitUntil(run_dialog.done_button.isVisible, timeout=200000)
+        qtbot.waitUntil(lambda: run_dialog._tab_widget.currentWidget() is not None)
+
+        # Assert that the number of boxes in the detailed view is
+        # equal to the number of realizations
+        realization_widget = run_dialog._tab_widget.currentWidget()
+        assert isinstance(realization_widget, RealizationWidget)
+        list_model = realization_widget._real_view.model()
+        assert (
+            list_model.rowCount()
+            == experiment_panel.config.model_config.num_realizations
+        )
+
+        qtbot.mouseClick(run_dialog.done_button, Qt.LeftButton)
+
+    return func
+
+
+@pytest.fixture(name="active_realizations")
+def active_realizations_fixture() -> Mock:
+    active_reals = MagicMock()
+    active_reals.count = Mock(return_value=10)
+    active_reals.__iter__.return_value = [True] * 10
+    return active_reals
+
+
+@pytest.fixture
+def runmodel(active_realizations) -> Mock:
+    brm = Mock()
+    brm.get_runtime = Mock(return_value=100)
+    brm.format_error = Mock(return_value="")
+    brm.support_restart = True
+    brm.simulation_arguments = {"active_realizations": active_realizations}
+    brm.has_failed_realizations = lambda: False
+    return brm
+
+
+class MockTracker:
+    def __init__(self, events) -> None:
+        self._events = events
+        self._is_running = True
+
+    def track(self):
+        for event in self._events:
+            if not self._is_running:
+                break
+            time.sleep(0.1)
+            yield event
+
+    def reset(self):
+        pass
+
+    def request_termination(self):
+        self._is_running = False
+
+
+@pytest.fixture
+def mock_tracker():
+    def _make_mock_tracker(events):
+        return MockTracker(events)
+
+    return _make_mock_tracker
+
+
+def load_results_manually(qtbot, gui, ensemble_name="default"):
+    def handle_load_results_dialog():
+        dialog = wait_for_child(gui, qtbot, ClosableDialog)
+        panel = get_child(dialog, LoadResultsPanel)
+
+        ensemble_selector = get_child(panel, EnsembleSelector)
+        index = ensemble_selector.findText(ensemble_name, Qt.MatchFlag.MatchContains)
+        assert index != -1
+        ensemble_selector.setCurrentIndex(index)
+
+        # click on "Load"
+        load_button = get_child(panel.parent(), QPushButton, name="Load")
+
+        # Verify that the messagebox is the success kind
+        def handle_popup_dialog():
+            messagebox = QApplication.activeModalWidget()
+            assert isinstance(messagebox, QMessageBox)
+            assert messagebox.text() == "Successfully loaded all realisations"
+            ok_button = messagebox.button(QMessageBox.Ok)
+            qtbot.mouseClick(ok_button, Qt.LeftButton)
+
+        QTimer.singleShot(2000, handle_popup_dialog)
+        qtbot.mouseClick(load_button, Qt.LeftButton)
+        dialog.close()
+
+    QTimer.singleShot(1000, handle_load_results_dialog)
+    load_results_tool = gui.tools["Load results manually"]
+    load_results_tool.trigger()
+
+
+def add_experiment_manually(
+    qtbot, gui, experiment_name="My_experiment", ensemble_name="default"
+):
+    manage_tool = gui.tools["Manage experiments"]
+    manage_tool.trigger()
+
+    assert isinstance(manage_tool, ManageExperimentsTool)
+    experiments_panel = manage_tool._manage_experiments_panel
+
+    # Open the create new experiment tab
+    experiments_panel.setCurrentIndex(0)
+    current_tab = experiments_panel.currentWidget()
+    assert current_tab.objectName() == "create_new_ensemble_tab"
+    storage_widget = get_child(current_tab, StorageWidget)
+
+    def handle_add_dialog():
+        dialog = wait_for_child(current_tab, qtbot, CreateExperimentDialog)
+        dialog._experiment_edit.setText(experiment_name)
+        dialog._ensemble_edit.setText(ensemble_name)
+        qtbot.mouseClick(dialog._ok_button, Qt.MouseButton.LeftButton)
+
+    QTimer.singleShot(1000, handle_add_dialog)
+    add_widget = get_child(storage_widget, AddWidget)
+    qtbot.mouseClick(add_widget.addButton, Qt.MouseButton.LeftButton)
+
+    experiments_panel.close()
+
+
+V = TypeVar("V")
+
+
+def wait_for_child(gui, qtbot: QtBot, typ: Type[V], *args, **kwargs) -> V:
+    qtbot.waitUntil(lambda: gui.findChild(typ, *args, **kwargs) is not None)
+    return get_child(gui, typ, *args, **kwargs)
+
+
+def get_child(gui: QWidget, typ: Type[V], *args, **kwargs) -> V:
+    child = gui.findChild(typ, *args, **kwargs)
+    assert isinstance(child, typ)
+    return child
+
+
+def get_children(gui: QWidget, typ: Type[V], *args, **kwargs) -> List[V]:
+    children: List[typ] = gui.findChildren(typ, *args, **kwargs)
+    return children
diff --git a/tests/unit_tests/gui/test_csv_export.py b/tests/ui_tests/gui/test_csv_export.py
similarity index 100%
rename from tests/unit_tests/gui/test_csv_export.py
rename to tests/ui_tests/gui/test_csv_export.py
diff --git a/tests/unit_tests/gui/test_full_manual_update_workflow.py b/tests/ui_tests/gui/test_full_manual_update_workflow.py
similarity index 100%
rename from tests/unit_tests/gui/test_full_manual_update_workflow.py
rename to tests/ui_tests/gui/test_full_manual_update_workflow.py
diff --git a/tests/unit_tests/gui/tools/test_load_results_manually.py b/tests/ui_tests/gui/test_load_results_manually.py
similarity index 98%
rename from tests/unit_tests/gui/tools/test_load_results_manually.py
rename to tests/ui_tests/gui/test_load_results_manually.py
index 6c67daf5d62..6dcc06cd688 100644
--- a/tests/unit_tests/gui/tools/test_load_results_manually.py
+++ b/tests/ui_tests/gui/test_load_results_manually.py
@@ -5,7 +5,7 @@
 from ert.gui.ertwidgets.ensembleselector import EnsembleSelector
 from ert.gui.tools.load_results import LoadResultsPanel
 
-from ..conftest import (
+from .conftest import (
     get_child,
     wait_for_child,
 )
diff --git a/tests/unit_tests/gui/test_main_window.py b/tests/ui_tests/gui/test_main_window.py
similarity index 100%
rename from tests/unit_tests/gui/test_main_window.py
rename to tests/ui_tests/gui/test_main_window.py
diff --git a/tests/unit_tests/gui/plottery/test_plotting_of_snake_oil.py b/tests/ui_tests/gui/test_plotting_of_snake_oil.py
similarity index 99%
rename from tests/unit_tests/gui/plottery/test_plotting_of_snake_oil.py
rename to tests/ui_tests/gui/test_plotting_of_snake_oil.py
index 02c554afa90..d6cf08dcd60 100644
--- a/tests/unit_tests/gui/plottery/test_plotting_of_snake_oil.py
+++ b/tests/ui_tests/gui/test_plotting_of_snake_oil.py
@@ -22,7 +22,8 @@
 )
 from ert.services import StorageService
 from ert.storage import open_storage
-from tests.unit_tests.gui.conftest import (
+
+from .conftest import (
     get_child,
     wait_for_child,
 )
diff --git a/tests/unit_tests/gui/test_restart_ensemble_experiment.py b/tests/ui_tests/gui/test_restart_ensemble_experiment.py
similarity index 100%
rename from tests/unit_tests/gui/test_restart_ensemble_experiment.py
rename to tests/ui_tests/gui/test_restart_ensemble_experiment.py
diff --git a/tests/unit_tests/gui/test_restart_esmda.py b/tests/ui_tests/gui/test_restart_esmda.py
similarity index 100%
rename from tests/unit_tests/gui/test_restart_esmda.py
rename to tests/ui_tests/gui/test_restart_esmda.py
diff --git a/tests/unit_tests/gui/test_restart_no_responses_and_parameters.py b/tests/ui_tests/gui/test_restart_no_responses_and_parameters.py
similarity index 100%
rename from tests/unit_tests/gui/test_restart_no_responses_and_parameters.py
rename to tests/ui_tests/gui/test_restart_no_responses_and_parameters.py
diff --git a/tests/unit_tests/gui/test_rft_export_plugin.py b/tests/ui_tests/gui/test_rft_export_plugin.py
similarity index 100%
rename from tests/unit_tests/gui/test_rft_export_plugin.py
rename to tests/ui_tests/gui/test_rft_export_plugin.py
diff --git a/tests/unit_tests/gui/test_single_test_run.py b/tests/ui_tests/gui/test_single_test_run.py
similarity index 100%
rename from tests/unit_tests/gui/test_single_test_run.py
rename to tests/ui_tests/gui/test_single_test_run.py
diff --git a/tests/unit_tests/gui/tools/test_workflow_tool.py b/tests/ui_tests/gui/test_workflow_tool.py
similarity index 98%
rename from tests/unit_tests/gui/tools/test_workflow_tool.py
rename to tests/ui_tests/gui/test_workflow_tool.py
index f2a794b00ac..0946e2949cb 100644
--- a/tests/unit_tests/gui/tools/test_workflow_tool.py
+++ b/tests/ui_tests/gui/test_workflow_tool.py
@@ -18,7 +18,7 @@
 from ert.services import StorageService
 from ert.storage import Storage, open_storage
 
-from ..conftest import get_child, wait_for_child
+from .conftest import get_child, wait_for_child
 
 
 @contextmanager
diff --git a/tests/unit_tests/gui/conftest.py b/tests/unit_tests/gui/conftest.py
index c8614827bb1..19fa1babddf 100644
--- a/tests/unit_tests/gui/conftest.py
+++ b/tests/unit_tests/gui/conftest.py
@@ -1,24 +1,8 @@
-import contextlib
 import copy
-import fileinput
-import os
-import os.path
-import shutil
-import stat
-import time
-from contextlib import contextmanager
 from datetime import datetime as dt
-from pathlib import Path
-from textwrap import dedent
-from typing import Generator, List, Tuple, Type, TypeVar
-from unittest.mock import MagicMock, Mock
 
 import pytest
-from pytestqt.qtbot import QtBot
-from qtpy.QtCore import Qt, QTimer
-from qtpy.QtWidgets import QApplication, QComboBox, QMessageBox, QPushButton, QWidget
 
-from ert.config import ErtConfig
 from ert.ensemble_evaluator.snapshot import (
     EnsembleSnapshot,
     FMStepSnapshot,
@@ -32,295 +16,10 @@
     REALIZATION_STATE_RUNNING,
     REALIZATION_STATE_UNKNOWN,
 )
-from ert.gui.ertwidgets import ClosableDialog
-from ert.gui.ertwidgets.create_experiment_dialog import CreateExperimentDialog
-from ert.gui.ertwidgets.ensembleselector import EnsembleSelector
-from ert.gui.main import ErtMainWindow, _setup_main_window, add_gui_log_handler
-from ert.gui.simulation.experiment_panel import ExperimentPanel
-from ert.gui.simulation.run_dialog import RunDialog
-from ert.gui.simulation.view import RealizationWidget
-from ert.gui.tools.load_results.load_results_panel import LoadResultsPanel
-from ert.gui.tools.manage_experiments.manage_experiments_tool import (
-    ManageExperimentsTool,
-)
-from ert.gui.tools.manage_experiments.storage_widget import AddWidget, StorageWidget
-from ert.plugins import ErtPluginContext
-from ert.run_models import EnsembleExperiment, MultipleDataAssimilation
-from ert.services import StorageService
-from ert.storage import Storage, open_storage
 from tests import SnapshotBuilder
-from tests.unit_tests.gui.simulation.test_run_path_dialog import handle_run_path_dialog
-
-
-@pytest.fixture
-def opened_main_window(
-    source_root, tmp_path, monkeypatch
-) -> Generator[ErtMainWindow, None, None]:
-    monkeypatch.chdir(tmp_path)
-    _new_poly_example(source_root, tmp_path)
-    with _open_main_window(tmp_path / "poly.ert") as (
-        gui,
-        _,
-        config,
-    ), StorageService.init_service(
-        project=os.path.abspath(config.ens_path),
-    ):
-        yield gui
-
-
-def _new_poly_example(source_root, destination, num_realizations: int = 20):
-    shutil.copytree(
-        os.path.join(source_root, "test-data", "poly_example"),
-        destination,
-        dirs_exist_ok=True,
-    )
-
-    with fileinput.input(destination / "poly.ert", inplace=True) as fin:
-        for line in fin:
-            if "NUM_REALIZATIONS" in line:
-                # Decrease the number of realizations to speed up the test,
-                # if there is flakyness, this can be increased.
-                print(f"NUM_REALIZATIONS {num_realizations}", end="\n")
-            else:
-                print(line, end="")
-
-
-@contextmanager
-def _open_main_window(
-    path,
-) -> Generator[Tuple[ErtMainWindow, Storage, ErtConfig], None, None]:
-    args_mock = Mock()
-    args_mock.config = str(path)
-    with ErtPluginContext():
-        config = ErtConfig.with_plugins().from_file(path)
-        with open_storage(
-            config.ens_path, mode="w"
-        ) as storage, add_gui_log_handler() as log_handler:
-            gui = _setup_main_window(config, args_mock, log_handler, storage)
-            yield gui, storage, config
-            gui.close()
-
-
-@pytest.fixture
-def opened_main_window_clean(source_root, tmp_path, monkeypatch):
-    monkeypatch.chdir(tmp_path)
-    _new_poly_example(source_root, tmp_path)
-    with _open_main_window(tmp_path / "poly.ert") as (
-        gui,
-        _,
-        config,
-    ), StorageService.init_service(
-        project=os.path.abspath(config.ens_path),
-    ):
-        yield gui
-
-
-@pytest.fixture
-def opened_main_window_minimal_realizations(source_root, tmp_path, monkeypatch):
-    monkeypatch.chdir(tmp_path)
-    _new_poly_example(source_root, tmp_path, 2)
-    with _open_main_window(tmp_path / "poly.ert") as (
-        gui,
-        _,
-        config,
-    ), StorageService.init_service(
-        project=os.path.abspath(config.ens_path),
-    ):
-        yield gui
-
-
-@pytest.fixture
-def opened_main_window_snake_oil(snake_oil_case_storage):
-    with _open_main_window(Path("./snake_oil.ert")) as (
-        gui,
-        _,
-        config,
-    ), StorageService.init_service(
-        project=os.path.abspath(config.ens_path),
-    ):
-        yield gui
-
-
-@pytest.fixture(scope="module")
-def _esmda_run(run_experiment, source_root, tmp_path_factory):
-    path = tmp_path_factory.mktemp("test-data")
-    _new_poly_example(source_root, path)
-    with pytest.MonkeyPatch.context() as mp, _open_main_window(path / "poly.ert") as (
-        gui,
-        _,
-        config,
-    ):
-        mp.chdir(path)
-        run_experiment(MultipleDataAssimilation, gui)
-        # Check that we produce update log
-        log_paths = list(Path(config.analysis_config.log_path).iterdir())
-        assert log_paths
-        assert (log_paths[0] / "Report.report").exists()
-        assert (log_paths[0] / "Report.csv").exists()
-
-    return path
-
-
-def _ensemble_experiment_run(
-    run_experiment, source_root, tmp_path_factory, failing_reals
-):
-    path = tmp_path_factory.mktemp("test-data")
-    _new_poly_example(source_root, path)
-    with pytest.MonkeyPatch.context() as mp, _open_main_window(path / "poly.ert") as (
-        gui,
-        _,
-        _,
-    ):
-        mp.chdir(path)
-        if failing_reals:
-            with open("poly_eval.py", "w", encoding="utf-8") as f:
-                f.write(
-                    dedent(
-                        """\
-                        #!/usr/bin/env python3
-                        import os
-                        import sys
-                        import json
-
-                        def _load_coeffs(filename):
-                            with open(filename, encoding="utf-8") as f:
-                                return json.load(f)["COEFFS"]
-
-                        def _evaluate(coeffs, x):
-                            return coeffs["a"] * x**2 + coeffs["b"] * x + coeffs["c"]
-
-                        if __name__ == "__main__":
-                            if int(os.getenv("_ERT_REALIZATION_NUMBER")) % 2 == 0:
-                                sys.exit(1)
-                            coeffs = _load_coeffs("parameters.json")
-                            output = [_evaluate(coeffs, x) for x in range(10)]
-                            with open("poly.out", "w", encoding="utf-8") as f:
-                                f.write("\\n".join(map(str, output)))
-                        """
-                    )
-                )
-            os.chmod(
-                "poly_eval.py",
-                os.stat("poly_eval.py").st_mode
-                | stat.S_IXUSR
-                | stat.S_IXGRP
-                | stat.S_IXOTH,
-            )
-        run_experiment(EnsembleExperiment, gui)
-
-    return path
-
-
-@pytest.fixture
-def esmda_has_run(_esmda_run, tmp_path, monkeypatch):
-    monkeypatch.chdir(tmp_path)
-    shutil.copytree(_esmda_run, tmp_path, dirs_exist_ok=True)
-    with _open_main_window(tmp_path / "poly.ert") as (
-        gui,
-        _,
-        config,
-    ), StorageService.init_service(
-        project=os.path.abspath(config.ens_path),
-    ):
-        yield gui
-
-
-@pytest.fixture
-def ensemble_experiment_has_run(
-    tmp_path, monkeypatch, run_experiment, source_root, tmp_path_factory
-):
-    monkeypatch.chdir(tmp_path)
-    test_files = _ensemble_experiment_run(
-        run_experiment, source_root, tmp_path_factory, True
-    )
-    shutil.copytree(test_files, tmp_path, dirs_exist_ok=True)
-    with _open_main_window(tmp_path / "poly.ert") as (
-        gui,
-        _,
-        config,
-    ), StorageService.init_service(
-        project=os.path.abspath(config.ens_path),
-    ):
-        yield gui
 
 
 @pytest.fixture
-def ensemble_experiment_has_run_no_failure(
-    tmp_path, monkeypatch, run_experiment, source_root, tmp_path_factory
-):
-    monkeypatch.chdir(tmp_path)
-    test_files = _ensemble_experiment_run(
-        run_experiment, source_root, tmp_path_factory, False
-    )
-    shutil.copytree(test_files, tmp_path, dirs_exist_ok=True)
-    with _open_main_window(tmp_path / "poly.ert") as (
-        gui,
-        _,
-        config,
-    ), StorageService.init_service(
-        project=os.path.abspath(config.ens_path),
-    ):
-        yield gui
-
-
-@pytest.fixture(name="run_experiment", scope="module")
-def run_experiment_fixture(request):
-    def func(experiment_mode, gui):
-        qtbot = QtBot(request)
-        with contextlib.suppress(FileNotFoundError):
-            shutil.rmtree("poly_out")
-        # Select correct experiment in the simulation panel
-        experiment_panel = gui.findChild(ExperimentPanel)
-        assert isinstance(experiment_panel, ExperimentPanel)
-        simulation_mode_combo = experiment_panel.findChild(QComboBox)
-        assert isinstance(simulation_mode_combo, QComboBox)
-        simulation_mode_combo.setCurrentText(experiment_mode.name())
-        simulation_settings = experiment_panel._experiment_widgets[
-            experiment_panel.get_current_experiment_type()
-        ]
-        if hasattr(simulation_settings, "_ensemble_name_field"):
-            simulation_settings._ensemble_name_field.setText("iter-0")
-
-        # Click start simulation and agree to the message
-        run_experiment = experiment_panel.findChild(QWidget, name="run_experiment")
-
-        def handle_dialog():
-            QTimer.singleShot(
-                500,
-                lambda: handle_run_path_dialog(gui, qtbot, delete_run_path=False),
-            )
-
-        if not experiment_mode.name() in (
-            "Ensemble experiment",
-            "Evaluate ensemble",
-        ):
-            QTimer.singleShot(500, handle_dialog)
-        qtbot.mouseClick(run_experiment, Qt.LeftButton)
-
-        # The Run dialog opens, click show details and wait until done appears
-        # then click it
-        qtbot.waitUntil(lambda: gui.findChild(RunDialog) is not None)
-        run_dialog = gui.findChild(RunDialog)
-
-        qtbot.waitUntil(run_dialog.done_button.isVisible, timeout=200000)
-        qtbot.waitUntil(lambda: run_dialog._tab_widget.currentWidget() is not None)
-
-        # Assert that the number of boxes in the detailed view is
-        # equal to the number of realizations
-        realization_widget = run_dialog._tab_widget.currentWidget()
-        assert isinstance(realization_widget, RealizationWidget)
-        list_model = realization_widget._real_view.model()
-        assert (
-            list_model.rowCount()
-            == experiment_panel.config.model_config.num_realizations
-        )
-
-        qtbot.mouseClick(run_dialog.done_button, Qt.LeftButton)
-
-    return func
-
-
-@pytest.fixture()
 def full_snapshot() -> EnsembleSnapshot:
     real = RealizationSnapshot(
         status=REALIZATION_STATE_RUNNING,
@@ -383,7 +82,7 @@ def full_snapshot() -> EnsembleSnapshot:
     return snapshot
 
 
-@pytest.fixture()
+@pytest.fixture
 def fail_snapshot() -> EnsembleSnapshot:
     real = RealizationSnapshot(
         status=REALIZATION_STATE_FAILED,
@@ -430,146 +129,3 @@ def large_snapshot() -> EnsembleSnapshot:
         )
     real_ids = [str(i) for i in range(0, 150)]
     return builder.build(real_ids, REALIZATION_STATE_UNKNOWN)
-
-
-@pytest.fixture()
-def small_snapshot() -> EnsembleSnapshot:
-    builder = SnapshotBuilder()
-    for i in range(0, 2):
-        builder.add_fm_step(
-            fm_step_id=str(i),
-            index=str(i),
-            name=f"job_{i}",
-            current_memory_usage="500",
-            max_memory_usage="1000",
-            status=FORWARD_MODEL_STATE_START,
-            stdout=f"job_{i}.stdout",
-            stderr=f"job_{i}.stderr",
-            start_time=dt(1999, 1, 1),
-            end_time=dt(2019, 1, 1),
-        )
-    real_ids = [str(i) for i in range(0, 5)]
-    return builder.build(real_ids, REALIZATION_STATE_UNKNOWN)
-
-
-@pytest.fixture(name="active_realizations")
-def active_realizations_fixture() -> Mock:
-    active_reals = MagicMock()
-    active_reals.count = Mock(return_value=10)
-    active_reals.__iter__.return_value = [True] * 10
-    return active_reals
-
-
-@pytest.fixture
-def runmodel(active_realizations) -> Mock:
-    brm = Mock()
-    brm.get_runtime = Mock(return_value=100)
-    brm.format_error = Mock(return_value="")
-    brm.support_restart = True
-    brm.simulation_arguments = {"active_realizations": active_realizations}
-    brm.has_failed_realizations = lambda: False
-    return brm
-
-
-class MockTracker:
-    def __init__(self, events) -> None:
-        self._events = events
-        self._is_running = True
-
-    def track(self):
-        for event in self._events:
-            if not self._is_running:
-                break
-            time.sleep(0.1)
-            yield event
-
-    def reset(self):
-        pass
-
-    def request_termination(self):
-        self._is_running = False
-
-
-@pytest.fixture
-def mock_tracker():
-    def _make_mock_tracker(events):
-        return MockTracker(events)
-
-    return _make_mock_tracker
-
-
-def load_results_manually(qtbot, gui, ensemble_name="default"):
-    def handle_load_results_dialog():
-        dialog = wait_for_child(gui, qtbot, ClosableDialog)
-        panel = get_child(dialog, LoadResultsPanel)
-
-        ensemble_selector = get_child(panel, EnsembleSelector)
-        index = ensemble_selector.findText(ensemble_name, Qt.MatchFlag.MatchContains)
-        assert index != -1
-        ensemble_selector.setCurrentIndex(index)
-
-        # click on "Load"
-        load_button = get_child(panel.parent(), QPushButton, name="Load")
-
-        # Verify that the messagebox is the success kind
-        def handle_popup_dialog():
-            messagebox = QApplication.activeModalWidget()
-            assert isinstance(messagebox, QMessageBox)
-            assert messagebox.text() == "Successfully loaded all realisations"
-            ok_button = messagebox.button(QMessageBox.Ok)
-            qtbot.mouseClick(ok_button, Qt.LeftButton)
-
-        QTimer.singleShot(2000, handle_popup_dialog)
-        qtbot.mouseClick(load_button, Qt.LeftButton)
-        dialog.close()
-
-    QTimer.singleShot(1000, handle_load_results_dialog)
-    load_results_tool = gui.tools["Load results manually"]
-    load_results_tool.trigger()
-
-
-def add_experiment_manually(
-    qtbot, gui, experiment_name="My_experiment", ensemble_name="default"
-):
-    manage_tool = gui.tools["Manage experiments"]
-    manage_tool.trigger()
-
-    assert isinstance(manage_tool, ManageExperimentsTool)
-    experiments_panel = manage_tool._manage_experiments_panel
-
-    # Open the create new experiment tab
-    experiments_panel.setCurrentIndex(0)
-    current_tab = experiments_panel.currentWidget()
-    assert current_tab.objectName() == "create_new_ensemble_tab"
-    storage_widget = get_child(current_tab, StorageWidget)
-
-    def handle_add_dialog():
-        dialog = wait_for_child(current_tab, qtbot, CreateExperimentDialog)
-        dialog._experiment_edit.setText(experiment_name)
-        dialog._ensemble_edit.setText(ensemble_name)
-        qtbot.mouseClick(dialog._ok_button, Qt.MouseButton.LeftButton)
-
-    QTimer.singleShot(1000, handle_add_dialog)
-    add_widget = get_child(storage_widget, AddWidget)
-    qtbot.mouseClick(add_widget.addButton, Qt.MouseButton.LeftButton)
-
-    experiments_panel.close()
-
-
-V = TypeVar("V")
-
-
-def wait_for_child(gui, qtbot: QtBot, typ: Type[V], *args, **kwargs) -> V:
-    qtbot.waitUntil(lambda: gui.findChild(typ, *args, **kwargs) is not None)
-    return get_child(gui, typ, *args, **kwargs)
-
-
-def get_child(gui: QWidget, typ: Type[V], *args, **kwargs) -> V:
-    child = gui.findChild(typ, *args, **kwargs)
-    assert isinstance(child, typ)
-    return child
-
-
-def get_children(gui: QWidget, typ: Type[V], *args, **kwargs) -> List[V]:
-    children: List[typ] = gui.findChildren(typ, *args, **kwargs)
-    return children
diff --git a/tests/unit_tests/gui/ertwidgets/test_plot_case_selection_widget.py b/tests/unit_tests/gui/ertwidgets/test_plot_case_selection_widget.py
index cd78b5bfdab..75611a2c234 100644
--- a/tests/unit_tests/gui/ertwidgets/test_plot_case_selection_widget.py
+++ b/tests/unit_tests/gui/ertwidgets/test_plot_case_selection_widget.py
@@ -6,8 +6,7 @@
     EnsembleSelectionWidget,
     EnsembleSelectListWidget,
 )
-
-from ..conftest import get_child
+from tests.ui_tests.gui.conftest import get_child
 
 
 def test_ensemble_selection_widget_max_min_selection(qtbot: QtBot):
diff --git a/tests/unit_tests/gui/simulation/test_evaluate_ensemble_panel.py b/tests/unit_tests/gui/simulation/test_evaluate_ensemble_panel.py
index 5976c34ab2c..e6954250be5 100644
--- a/tests/unit_tests/gui/simulation/test_evaluate_ensemble_panel.py
+++ b/tests/unit_tests/gui/simulation/test_evaluate_ensemble_panel.py
@@ -6,7 +6,7 @@
 from ert.gui.main import GUILogHandler
 from ert.gui.simulation.evaluate_ensemble_panel import EvaluateEnsemblePanel
 from ert.gui.simulation.experiment_panel import ExperimentPanel
-from tests.unit_tests.gui.conftest import get_child
+from tests.ui_tests.gui.conftest import get_child
 
 
 @pytest.mark.usefixtures("copy_poly_case")
diff --git a/tests/unit_tests/gui/simulation/test_run_dialog.py b/tests/unit_tests/gui/simulation/test_run_dialog.py
index 230502bf366..1dea3a17fba 100644
--- a/tests/unit_tests/gui/simulation/test_run_dialog.py
+++ b/tests/unit_tests/gui/simulation/test_run_dialog.py
@@ -28,10 +28,9 @@
 from ert.services import StorageService
 from ert.storage import open_storage
 from tests import SnapshotBuilder
+from tests.ui_tests.gui.conftest import wait_for_child
 from tests.unit_tests.gui.simulation.test_run_path_dialog import handle_run_path_dialog
 
-from ..conftest import wait_for_child
-
 
 @pytest.fixture
 def run_model():
diff --git a/tests/unit_tests/gui/simulation/view/test_legend.py b/tests/unit_tests/gui/simulation/view/test_legend.py
index 604eeb9e9f5..ee3d2e88dc7 100644
--- a/tests/unit_tests/gui/simulation/view/test_legend.py
+++ b/tests/unit_tests/gui/simulation/view/test_legend.py
@@ -7,7 +7,7 @@
 
 from ert.ensemble_evaluator.state import REAL_STATE_TO_COLOR
 from ert.gui.simulation.view import ProgressWidget
-from tests.unit_tests.gui.conftest import get_child
+from tests.ui_tests.gui.conftest import get_child
 
 
 @given(
diff --git a/tests/unit_tests/gui/simulation/view/test_realization.py b/tests/unit_tests/gui/simulation/view/test_realization.py
index 44ce864ae9a..049a1f79ae8 100644
--- a/tests/unit_tests/gui/simulation/view/test_realization.py
+++ b/tests/unit_tests/gui/simulation/view/test_realization.py
@@ -1,10 +1,41 @@
+from datetime import datetime as dt
+
+import pytest
 from qtpy import QtCore
 from qtpy.QtCore import QModelIndex, QSize
 from qtpy.QtWidgets import QStyledItemDelegate, QStyleOptionViewItem
 
+from ert.ensemble_evaluator.snapshot import (
+    EnsembleSnapshot,
+)
+from ert.ensemble_evaluator.state import (
+    FORWARD_MODEL_STATE_START,
+    REALIZATION_STATE_UNKNOWN,
+)
 from ert.gui.model.node import _Node
 from ert.gui.model.snapshot import SnapshotModel
 from ert.gui.simulation.view.realization import RealizationWidget
+from tests import SnapshotBuilder
+
+
+@pytest.fixture
+def small_snapshot() -> EnsembleSnapshot:
+    builder = SnapshotBuilder()
+    for i in range(0, 2):
+        builder.add_fm_step(
+            fm_step_id=str(i),
+            index=str(i),
+            name=f"job_{i}",
+            current_memory_usage="500",
+            max_memory_usage="1000",
+            status=FORWARD_MODEL_STATE_START,
+            stdout=f"job_{i}.stdout",
+            stderr=f"job_{i}.stderr",
+            start_time=dt(1999, 1, 1),
+            end_time=dt(2019, 1, 1),
+        )
+    real_ids = [str(i) for i in range(0, 5)]
+    return builder.build(real_ids, REALIZATION_STATE_UNKNOWN)
 
 
 class MockDelegate(QStyledItemDelegate):

From 156e202efac9f112f189acbf53270e2fc31955ec Mon Sep 17 00:00:00 2001
From: Eivind Jahren <ejah@equinor.com>
Date: Fri, 13 Sep 2024 13:35:18 +0200
Subject: [PATCH 03/11] Merge integration_tests into unit_tests

Also adjusted which tests have the integration_tests marker
---
 tests/__init__.py                             |  14 +
 tests/integration_tests/scheduler/conftest.py |  20 -
 .../scheduler/test_lsf_driver.py              | 335 ----------------
 .../scheduler/test_openpbs_driver.py          |  69 ----
 .../scheduler/test_slurm_driver.py            | 185 ---------
 tests/integration_tests/share/__init__.py     |   0
 tests/integration_tests/status/__init__.py    |   0
 tests/performance_tests/test_analysis.py      | 183 +++++++++
 tests/performance_tests/test_read_summary.py  |  23 ++
 .../cli/test_local_driver.py}                 |   0
 tests/ui_tests/gui/test_main_window.py        |  98 +++++
 .../gui}/test_manage_experiments_tool.py      |   0
 tests/unit_tests/analysis/test_es_update.py   | 172 +-------
 .../parsing/test_observations_parser.py       |   1 +
 tests/unit_tests/config/test_ert_config.py    |   4 +
 tests/unit_tests/config/test_observations.py  |   1 +
 .../config/test_parser_error_collection.py    |   3 +
 tests/unit_tests/config/test_read_summary.py  |  18 +-
 .../config/test_substitution_list.py          |   2 +
 .../unit_tests/config/test_summary_config.py  |   5 +-
 .../dark_storage/test_http_endpoints.py       |  11 +
 .../test_ensemble_evaluator.py                |   3 +
 .../test_ensemble_legacy.py                   |   2 +
 .../ensemble_evaluator/test_scheduler.py      |   1 +
 .../test_event_reporter.py                    |   2 +
 .../forward_model_runner/test_job.py          |   1 +
 tests/unit_tests/gui/model/test_snapshot.py   |   2 +
 .../unit_tests/gui/plottery/test_histogram.py |   1 +
 .../gui/simulation/test_run_dialog.py         |  67 +---
 .../gui/simulation/test_run_path_dialog.py    |   4 +
 .../resources}/__init__.py                    |   0
 .../resources}/_import_from_location.py       |   0
 .../resources}/ecl_run_fail                   |   0
 .../resources}/test_ecl_run_new_config.py     |  12 +-
 .../resources}/test_ecl_versioning_config.py  |   0
 .../resources}/test_forward_models.py         |   0
 .../resources}/test_opm_flow.py               |   2 +
 .../resources}/test_shell.py                  |   0
 .../resources}/test_subprocess.py             |   3 +-
 .../resources}/test_templating.py             |   0
 .../scheduler/__init__.py                     |   0
 .../scheduler/bin/bhist                       |   0
 .../scheduler/bin/bhist.py                    |   0
 .../scheduler/bin/bjobs                       |   0
 .../scheduler/bin/bjobs.py                    |   0
 .../scheduler/bin/bkill                       |   0
 .../scheduler/bin/bkill.py                    |   0
 .../scheduler/bin/bsub                        |   0
 .../scheduler/bin/lsfrunner                   |   0
 .../scheduler/bin/qdel                        |   0
 .../scheduler/bin/qstat                       |   0
 .../scheduler/bin/qstat.py                    |   0
 .../scheduler/bin/qsub                        |   0
 .../scheduler/bin/runner                      |   0
 .../scheduler/bin/sbatch                      |   0
 .../scheduler/bin/sbatch.py                   |   0
 .../scheduler/bin/scancel                     |   0
 .../scheduler/bin/scontrol                    |   0
 .../scheduler/bin/scontrol.py                 |   0
 .../scheduler/bin/squeue                      |   0
 .../scheduler/bin/squeue.py                   |   0
 tests/unit_tests/scheduler/conftest.py        |  18 +
 .../scheduler/test_generic_driver.py          |   5 +-
 tests/unit_tests/scheduler/test_lsf_driver.py | 366 +++++++++++++++++-
 .../scheduler/test_openpbs_driver.py          |  70 +++-
 .../unit_tests/scheduler/test_slurm_driver.py | 179 +++++++++
 .../unit_tests/services/test_base_service.py  |   5 +
 tests/unit_tests/shared/test_port_handler.py  |   3 +
 tests/unit_tests/simulator/test_batch_sim.py  |   3 +
 .../observations/FWPR                         |   0
 .../observations/GEN                          |   0
 .../test_that_storage_matches/parameters      |   0
 .../test_that_storage_matches/responses       |   0
 .../test_that_storage_matches/summary_data    |   0
 .../unit_tests/storage/test_local_storage.py  |   2 +-
 .../storage}/test_storage_migration.py        |   2 +
 .../test_tracking.py}                         |   0
 .../workflow_runner/test_workflow_runner.py   |   2 +
 78 files changed, 1010 insertions(+), 889 deletions(-)
 delete mode 100644 tests/integration_tests/scheduler/conftest.py
 delete mode 100644 tests/integration_tests/scheduler/test_lsf_driver.py
 delete mode 100644 tests/integration_tests/scheduler/test_openpbs_driver.py
 delete mode 100644 tests/integration_tests/scheduler/test_slurm_driver.py
 delete mode 100644 tests/integration_tests/share/__init__.py
 delete mode 100644 tests/integration_tests/status/__init__.py
 create mode 100644 tests/performance_tests/test_analysis.py
 create mode 100644 tests/performance_tests/test_read_summary.py
 rename tests/{integration_tests/scheduler/test_integration_local_driver.py => ui_tests/cli/test_local_driver.py} (100%)
 rename tests/{unit_tests/gui/tools => ui_tests/gui}/test_manage_experiments_tool.py (100%)
 rename tests/{integration_tests => unit_tests/resources}/__init__.py (100%)
 rename tests/{integration_tests/share => unit_tests/resources}/_import_from_location.py (100%)
 rename tests/{integration_tests/share => unit_tests/resources}/ecl_run_fail (100%)
 rename tests/{integration_tests/share => unit_tests/resources}/test_ecl_run_new_config.py (95%)
 rename tests/{integration_tests/share => unit_tests/resources}/test_ecl_versioning_config.py (100%)
 rename tests/{integration_tests/share => unit_tests/resources}/test_forward_models.py (100%)
 rename tests/{integration_tests/share => unit_tests/resources}/test_opm_flow.py (99%)
 rename tests/{integration_tests/share => unit_tests/resources}/test_shell.py (100%)
 rename tests/{integration_tests/share => unit_tests/resources}/test_subprocess.py (97%)
 rename tests/{integration_tests/share => unit_tests/resources}/test_templating.py (100%)
 rename tests/{integration_tests => unit_tests}/scheduler/__init__.py (100%)
 rename tests/{integration_tests => unit_tests}/scheduler/bin/bhist (100%)
 rename tests/{integration_tests => unit_tests}/scheduler/bin/bhist.py (100%)
 rename tests/{integration_tests => unit_tests}/scheduler/bin/bjobs (100%)
 rename tests/{integration_tests => unit_tests}/scheduler/bin/bjobs.py (100%)
 rename tests/{integration_tests => unit_tests}/scheduler/bin/bkill (100%)
 rename tests/{integration_tests => unit_tests}/scheduler/bin/bkill.py (100%)
 rename tests/{integration_tests => unit_tests}/scheduler/bin/bsub (100%)
 rename tests/{integration_tests => unit_tests}/scheduler/bin/lsfrunner (100%)
 rename tests/{integration_tests => unit_tests}/scheduler/bin/qdel (100%)
 rename tests/{integration_tests => unit_tests}/scheduler/bin/qstat (100%)
 rename tests/{integration_tests => unit_tests}/scheduler/bin/qstat.py (100%)
 rename tests/{integration_tests => unit_tests}/scheduler/bin/qsub (100%)
 rename tests/{integration_tests => unit_tests}/scheduler/bin/runner (100%)
 rename tests/{integration_tests => unit_tests}/scheduler/bin/sbatch (100%)
 rename tests/{integration_tests => unit_tests}/scheduler/bin/sbatch.py (100%)
 rename tests/{integration_tests => unit_tests}/scheduler/bin/scancel (100%)
 rename tests/{integration_tests => unit_tests}/scheduler/bin/scontrol (100%)
 rename tests/{integration_tests => unit_tests}/scheduler/bin/scontrol.py (100%)
 rename tests/{integration_tests => unit_tests}/scheduler/bin/squeue (100%)
 rename tests/{integration_tests => unit_tests}/scheduler/bin/squeue.py (100%)
 rename tests/{integration_tests => unit_tests}/scheduler/test_generic_driver.py (98%)
 rename tests/{integration_tests => unit_tests/storage}/snapshots/test_storage_migration/test_that_storage_matches/observations/FWPR (100%)
 rename tests/{integration_tests => unit_tests/storage}/snapshots/test_storage_migration/test_that_storage_matches/observations/GEN (100%)
 rename tests/{integration_tests => unit_tests/storage}/snapshots/test_storage_migration/test_that_storage_matches/parameters (100%)
 rename tests/{integration_tests => unit_tests/storage}/snapshots/test_storage_migration/test_that_storage_matches/responses (100%)
 rename tests/{integration_tests => unit_tests/storage}/snapshots/test_storage_migration/test_that_storage_matches/summary_data (100%)
 rename tests/{integration_tests => unit_tests/storage}/test_storage_migration.py (99%)
 rename tests/{integration_tests/status/test_tracking_integration.py => unit_tests/test_tracking.py} (100%)

diff --git a/tests/__init__.py b/tests/__init__.py
index d7dd10f57ba..86b3081e6f2 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -1,3 +1,5 @@
+import importlib.util
+import sys
 from copy import deepcopy
 from datetime import datetime
 from typing import Any, Dict, Optional, Sequence
@@ -12,6 +14,18 @@
 )
 
 
+def import_from_location(name, location):
+    spec = importlib.util.spec_from_file_location(name, location)
+    if spec is None:
+        raise ImportError(f"Could not find {name}")
+    module = importlib.util.module_from_spec(spec)
+    sys.modules[name] = module
+    if spec.loader is None:
+        raise ImportError(f"No loader for {name}")
+    spec.loader.exec_module(module)
+    return module
+
+
 class SnapshotBuilder(BaseModel):
     fm_steps: Dict[str, FMStepSnapshot] = {}
     metadata: Dict[str, Any] = {}
diff --git a/tests/integration_tests/scheduler/conftest.py b/tests/integration_tests/scheduler/conftest.py
deleted file mode 100644
index 1c6eacc75c0..00000000000
--- a/tests/integration_tests/scheduler/conftest.py
+++ /dev/null
@@ -1,20 +0,0 @@
-from __future__ import annotations
-
-import os
-import sys
-from pathlib import Path
-
-import pytest
-
-
-def mock_bin(monkeypatch, tmp_path):
-    bin_path = Path(__file__).parent / "bin"
-
-    monkeypatch.setenv("PATH", f"{bin_path}:{os.environ['PATH']}")
-    monkeypatch.setenv("PYTEST_TMP_PATH", str(tmp_path))
-    monkeypatch.setenv("PYTHON", sys.executable)
-
-
-@pytest.fixture
-def job_name(request) -> str:
-    return request.node.name.split("[")[0]
diff --git a/tests/integration_tests/scheduler/test_lsf_driver.py b/tests/integration_tests/scheduler/test_lsf_driver.py
deleted file mode 100644
index e13dc453ba6..00000000000
--- a/tests/integration_tests/scheduler/test_lsf_driver.py
+++ /dev/null
@@ -1,335 +0,0 @@
-import asyncio
-import json
-import logging
-import os
-import random
-import re
-import stat
-import string
-import sys
-from pathlib import Path
-
-import pytest
-
-from ert.scheduler import LsfDriver
-from ert.scheduler.driver import SIGNAL_OFFSET
-from ert.scheduler.lsf_driver import LSF_FAILED_JOB
-from tests.utils import poll
-
-from .conftest import mock_bin
-
-
-@pytest.fixture(autouse=True)
-def mock_lsf(pytestconfig, monkeypatch, tmp_path):
-    if pytestconfig.getoption("lsf"):
-        # User provided --lsf, which means we should use the actual LSF
-        # cluster without mocking anything.""
-        return
-    mock_bin(monkeypatch, tmp_path)
-
-
-@pytest.fixture
-def not_found_bjobs(monkeypatch, tmp_path):
-    """This creates a bjobs command that will always claim a job
-    does not exist, mimicking a job that has 'fallen out of the bjobs cache'."""
-    os.chdir(tmp_path)
-    bin_path = tmp_path / "bin"
-    bin_path.mkdir()
-    monkeypatch.setenv("PATH", f"{bin_path}:{os.environ['PATH']}")
-    bjobs_path = bin_path / "bjobs"
-    bjobs_path.write_text(
-        "#!/bin/sh\n" 'echo "Job <$1> is not found"',
-        encoding="utf-8",
-    )
-    bjobs_path.chmod(bjobs_path.stat().st_mode | stat.S_IEXEC)
-
-
-async def test_lsf_stdout_file(tmp_path, job_name):
-    os.chdir(tmp_path)
-    driver = LsfDriver()
-    await driver.submit(0, "sh", "-c", "echo yay", name=job_name)
-    await poll(driver, {0})
-    lsf_stdout = Path(f"{job_name}.LSF-stdout").read_text(encoding="utf-8")
-    assert Path(
-        f"{job_name}.LSF-stdout"
-    ).exists(), "LSF system did not write output file"
-
-    assert "Sender: " in lsf_stdout, "LSF stdout should always start with 'Sender:'"
-    assert "The output (if any) follows:" in lsf_stdout
-    assert "yay" in lsf_stdout
-
-
-async def test_lsf_dumps_stderr_to_file(tmp_path, job_name):
-    os.chdir(tmp_path)
-    driver = LsfDriver()
-    failure_message = "failURE"
-    await driver.submit(0, "sh", "-c", f"echo {failure_message} >&2", name=job_name)
-    await poll(driver, {0})
-    assert Path(
-        f"{job_name}.LSF-stderr"
-    ).exists(), "LSF system did not write stderr file"
-
-    assert (
-        Path(f"{job_name}.LSF-stderr").read_text(encoding="utf-8").strip()
-        == failure_message
-    )
-
-
-def generate_random_text(size):
-    letters = string.ascii_letters
-    return "".join(random.choice(letters) for i in range(size))
-
-
-@pytest.mark.parametrize("tail_chars_to_read", [(5), (50), (500), (700)])
-async def test_lsf_can_retrieve_stdout_and_stderr(
-    tmp_path, job_name, tail_chars_to_read
-):
-    os.chdir(tmp_path)
-    driver = LsfDriver()
-    num_written_characters = 600
-    _out = generate_random_text(num_written_characters)
-    _err = generate_random_text(num_written_characters)
-    await driver.submit(0, "sh", "-c", f"echo {_out} && echo {_err} >&2", name=job_name)
-    await poll(driver, {0})
-    message = driver.read_stdout_and_stderr_files(
-        runpath=".",
-        job_name=job_name,
-        num_characters_to_read_from_end=tail_chars_to_read,
-    )
-
-    stderr_txt = Path(f"{job_name}.LSF-stderr").read_text(encoding="utf-8").strip()
-    stdout_txt = Path(f"{job_name}.LSF-stdout").read_text(encoding="utf-8").strip()
-
-    assert stderr_txt[-min(tail_chars_to_read, num_written_characters) + 2 :] in message
-    assert stdout_txt[-min(tail_chars_to_read, num_written_characters) + 2 :] in message
-
-
-async def test_lsf_cannot_retrieve_stdout_and_stderr(tmp_path, job_name):
-    os.chdir(tmp_path)
-    driver = LsfDriver()
-    num_written_characters = 600
-    _out = generate_random_text(num_written_characters)
-    _err = generate_random_text(num_written_characters)
-    await driver.submit(0, "sh", "-c", f"echo {_out} && echo {_err} >&2", name=job_name)
-    await poll(driver, {0})
-    # let's remove the output files
-    os.remove(job_name + ".LSF-stderr")
-    os.remove(job_name + ".LSF-stdout")
-    message = driver.read_stdout_and_stderr_files(
-        runpath=".",
-        job_name=job_name,
-        num_characters_to_read_from_end=1,
-    )
-    assert "LSF-stderr:\nNo output file" in message
-    assert "LSF-stdout:\nNo output file" in message
-
-
-@pytest.mark.parametrize("explicit_runpath", [(True), (False)])
-async def test_lsf_info_file_in_runpath(explicit_runpath, tmp_path, job_name):
-    os.chdir(tmp_path)
-    driver = LsfDriver()
-    (tmp_path / "some_runpath").mkdir()
-    os.chdir(tmp_path)
-    effective_runpath = tmp_path / "some_runpath" if explicit_runpath else tmp_path
-    await driver.submit(
-        0,
-        "sh",
-        "-c",
-        "exit 0",
-        runpath=tmp_path / "some_runpath" if explicit_runpath else None,
-        name=job_name,
-    )
-
-    await poll(driver, {0})
-
-    effective_runpath = tmp_path / "some_runpath" if explicit_runpath else tmp_path
-    assert json.loads(
-        (effective_runpath / "lsf_info.json").read_text(encoding="utf-8")
-    ).keys() == {"job_id"}
-
-
-@pytest.mark.integration_test
-async def test_submit_to_named_queue(tmp_path, caplog, job_name):
-    """If the environment variable _ERT_TEST_ALTERNATIVE_QUEUE is defined
-    a job will be attempted submitted to that queue.
-
-    As Ert does not keep track of which queue a job is executed in, we can only
-    test for success for the job."""
-    os.chdir(tmp_path)
-    driver = LsfDriver(queue_name=os.getenv("_ERT_TESTS_ALTERNATIVE_QUEUE"))
-    await driver.submit(0, "sh", "-c", f"echo test > {tmp_path}/test", name=job_name)
-    await poll(driver, {0})
-
-    assert (tmp_path / "test").read_text(encoding="utf-8") == "test\n"
-
-
-@pytest.mark.usefixtures("use_tmpdir")
-async def test_submit_with_resource_requirement(job_name):
-    resource_requirement = "select[cs && x86_64Linux]"
-    driver = LsfDriver(resource_requirement=resource_requirement)
-    await driver.submit(0, "sh", "-c", "echo test>test", name=job_name)
-    await poll(driver, {0})
-
-    assert Path("test").read_text(encoding="utf-8") == "test\n"
-
-
-@pytest.mark.usefixtures("use_tmpdir")
-async def test_submit_with_num_cpu(pytestconfig, job_name):
-    if not pytestconfig.getoption("lsf"):
-        return
-
-    num_cpu = 2
-    driver = LsfDriver()
-    await driver.submit(0, "sh", "-c", "echo test>test", name=job_name, num_cpu=num_cpu)
-    job_id = driver._iens2jobid[0]
-    await poll(driver, {0})
-
-    process = await asyncio.create_subprocess_exec(
-        "bhist",
-        "-l",
-        job_id,
-        stdout=asyncio.subprocess.PIPE,
-        stderr=asyncio.subprocess.PIPE,
-    )
-    stdout, stderr = await process.communicate()
-    stdout_no_whitespaces = re.sub(r"\s+", "", stdout.decode())
-    matches = re.search(r".*([0-9]+)ProcessorsRequested.*", stdout_no_whitespaces)
-    assert matches and matches[1] == str(
-        num_cpu
-    ), f"Could not verify processor allocation from stdout: {stdout}, stderr: {stderr}"
-
-    assert Path("test").read_text(encoding="utf-8") == "test\n"
-
-
-@pytest.mark.usefixtures("use_tmpdir")
-async def test_submit_with_realization_memory(pytestconfig, job_name):
-    if not pytestconfig.getoption("lsf"):
-        pytest.skip("Mocked LSF driver does not provide bhist")
-
-    realization_memory_bytes = 1024 * 1024
-    driver = LsfDriver()
-    await driver.submit(
-        0,
-        "sh",
-        "-c",
-        "echo test>test",
-        name=job_name,
-        realization_memory=realization_memory_bytes,
-    )
-    job_id = driver._iens2jobid[0]
-    await poll(driver, {0})
-
-    process = await asyncio.create_subprocess_exec(
-        "bhist",
-        "-l",
-        job_id,
-        stdout=asyncio.subprocess.PIPE,
-        stderr=asyncio.subprocess.PIPE,
-    )
-    stdout, _ = await process.communicate()
-    assert "rusage[mem=1]" in stdout.decode(encoding="utf-8")
-
-    assert Path("test").read_text(encoding="utf-8") == "test\n"
-
-
-async def test_polling_bhist_fallback(not_found_bjobs, caplog, job_name):
-    caplog.set_level(logging.DEBUG)
-    driver = LsfDriver()
-    Path("mock_jobs").mkdir()
-    Path("mock_jobs/pendingtimemillis").write_text("100", encoding="utf-8")
-    driver._poll_period = 0.01
-
-    bhist_called = False
-    original_bhist_method = driver._poll_once_by_bhist
-
-    def mock_poll_once_by_bhist(*args, **kwargs):
-        nonlocal bhist_called
-        bhist_called = True
-        return original_bhist_method(*args, **kwargs)
-
-    driver._poll_once_by_bhist = mock_poll_once_by_bhist
-
-    await driver.submit(0, "sh", "-c", "sleep 1", name=job_name)
-    job_id = list(driver._iens2jobid.values())[0]
-    await poll(driver, {0})
-    assert "bhist is used" in caplog.text
-    assert bhist_called
-    assert driver._bhist_cache and job_id in driver._bhist_cache
-
-
-@pytest.mark.flaky(rerun=10)
-async def test_that_kill_before_submit_is_finished_works(
-    tmp_path, monkeypatch, caplog, pytestconfig
-):
-    """This test asserts that it is possible to issue a kill command
-    to a realization right after it has been submitted (as in driver.submit()).
-
-    The bug intended to catch is if the driver gives up on killing before submission
-    is not done yet, it is important not to let the realization through in that scenario.
-
-    The design of the test alludes to much more flakyness than what is probable in reality,
-    thus reruns are allowed to make this pass.
-    """
-    os.chdir(tmp_path)
-
-    if pytestconfig.getoption("lsf"):
-        # Allow more time when tested on a real compute cluster to avoid false positives.
-        job_kill_window = 10
-        test_grace_time = 20
-    elif sys.platform.startswith("darwin"):
-        # Mitigate flakiness on low-power test nodes
-        job_kill_window = 5
-        test_grace_time = 10
-    else:
-        job_kill_window = 2
-        test_grace_time = 4
-
-    bin_path = tmp_path / "bin"
-    bin_path.mkdir()
-    monkeypatch.setenv("PATH", f"{bin_path}:{os.environ['PATH']}")
-    bsub_path = bin_path / "slow_bsub"
-    bsub_path.write_text(
-        "#!/bin/sh\nsleep 0.1\nbsub $@",
-        encoding="utf-8",
-    )
-    bsub_path.chmod(bsub_path.stat().st_mode | stat.S_IEXEC)
-
-    caplog.set_level(logging.DEBUG)
-    driver = LsfDriver(bsub_cmd="slow_bsub")
-
-    # Allow submit and kill to be interleaved by asyncio by issuing
-    # submit() in its own asyncio Task:
-    asyncio.create_task(
-        driver.submit(
-            # The sleep is the time window in which we can kill the job before
-            # the unwanted finish message appears on disk.
-            0,
-            "sh",
-            "-c",
-            f"sleep {job_kill_window}; touch {tmp_path}/survived",
-        )
-    )
-    await asyncio.sleep(0.01)  # Allow submit task to start executing
-    await driver.kill(0)  # This will wait until the submit is done and then kill
-
-    async def finished(iens: int, returncode: int):
-        SIGTERM = 15
-        assert iens == 0
-        # If the kill is issued before the job really starts, you will not
-        # get SIGTERM but rather LSF_FAILED_JOB. Whether SIGNAL_OFFSET is
-        # added or not depends on various shell configurations and is a
-        # detail we do not want to track.
-        assert returncode in (SIGTERM, SIGNAL_OFFSET + SIGTERM, LSF_FAILED_JOB)
-
-    await poll(driver, {0}, finished=finished)
-    assert "ERROR" not in str(caplog.text)
-
-    # In case the return value of the killed job is correct but the submitted
-    # shell script is still running for whatever reason, a file called
-    # "survived" will appear on disk. Wait for it, and then ensure it is not
-    # there.
-    assert test_grace_time > job_kill_window, "Wrong test setup"
-    await asyncio.sleep(test_grace_time)
-    assert not Path(
-        "survived"
-    ).exists(), "The process children of the job should also have been killed"
diff --git a/tests/integration_tests/scheduler/test_openpbs_driver.py b/tests/integration_tests/scheduler/test_openpbs_driver.py
deleted file mode 100644
index a6f987b4d75..00000000000
--- a/tests/integration_tests/scheduler/test_openpbs_driver.py
+++ /dev/null
@@ -1,69 +0,0 @@
-import os
-from functools import partial
-
-import pytest
-
-from ert.cli.main import ErtCliError
-from ert.mode_definitions import ENSEMBLE_EXPERIMENT_MODE
-from ert.scheduler.openpbs_driver import OpenPBSDriver
-from tests.ui_tests.cli.run_cli import run_cli
-
-from .conftest import mock_bin
-
-
-@pytest.fixture(autouse=True)
-def mock_openpbs(pytestconfig, monkeypatch, tmp_path):
-    if pytestconfig.getoption("openpbs"):
-        # User provided --openpbs, which means we should use the actual OpenPBS
-        # cluster without mocking anything.
-        return
-    mock_bin(monkeypatch, tmp_path)
-
-
-@pytest.fixture()
-def queue_name_config():
-    if queue_name := os.getenv("_ERT_TESTS_DEFAULT_QUEUE_NAME"):
-        return f"\nQUEUE_OPTION TORQUE QUEUE {queue_name}"
-    return ""
-
-
-async def mock_failure(message, *args, **kwargs):
-    raise RuntimeError(message)
-
-
-@pytest.mark.integration_test
-@pytest.mark.usefixtures("copy_poly_case")
-def test_openpbs_driver_with_poly_example_failing_submit_fails_ert_and_propagates_exception_to_user(
-    monkeypatch, caplog, queue_name_config
-):
-    monkeypatch.setattr(
-        OpenPBSDriver, "submit", partial(mock_failure, "Submit job failed")
-    )
-    with open("poly.ert", mode="a+", encoding="utf-8") as f:
-        f.write("QUEUE_SYSTEM TORQUE\nNUM_REALIZATIONS 2")
-        f.write(queue_name_config)
-    with pytest.raises(ErtCliError):
-        run_cli(
-            ENSEMBLE_EXPERIMENT_MODE,
-            "poly.ert",
-        )
-    assert "RuntimeError: Submit job failed" in caplog.text
-
-
-@pytest.mark.integration_test
-@pytest.mark.usefixtures("copy_poly_case")
-def test_openpbs_driver_with_poly_example_failing_poll_fails_ert_and_propagates_exception_to_user(
-    monkeypatch, caplog, queue_name_config
-):
-    monkeypatch.setattr(
-        OpenPBSDriver, "poll", partial(mock_failure, "Status polling failed")
-    )
-    with open("poly.ert", mode="a+", encoding="utf-8") as f:
-        f.write("QUEUE_SYSTEM TORQUE\nNUM_REALIZATIONS 2")
-        f.write(queue_name_config)
-    with pytest.raises(ErtCliError):
-        run_cli(
-            ENSEMBLE_EXPERIMENT_MODE,
-            "poly.ert",
-        )
-    assert "RuntimeError: Status polling failed" in caplog.text
diff --git a/tests/integration_tests/scheduler/test_slurm_driver.py b/tests/integration_tests/scheduler/test_slurm_driver.py
deleted file mode 100644
index 4ab7aed2c63..00000000000
--- a/tests/integration_tests/scheduler/test_slurm_driver.py
+++ /dev/null
@@ -1,185 +0,0 @@
-import asyncio
-import logging
-import os
-import random
-import stat
-import string
-import sys
-from pathlib import Path
-
-import pytest
-
-from ert.scheduler import SlurmDriver
-from tests.utils import poll
-
-from .conftest import mock_bin
-
-
-@pytest.fixture(autouse=True)
-def mock_slurm(pytestconfig, monkeypatch, tmp_path):
-    if pytestconfig.getoption("slurm"):
-        # User provided --slurm, which means we should use an actual Slurm
-        # cluster without mocking anything.""
-        return
-    mock_bin(monkeypatch, tmp_path)
-
-
-async def test_slurm_stdout_file(tmp_path, job_name):
-    os.chdir(tmp_path)
-    driver = SlurmDriver()
-    await driver.submit(0, "sh", "-c", "echo yay", name=job_name)
-    await poll(driver, {0})
-    slurm_stdout = Path(f"{job_name}.stdout").read_text(encoding="utf-8")
-    assert Path(f"{job_name}.stdout").exists(), "Slurm system did not write output file"
-    assert "yay" in slurm_stdout
-
-
-async def test_slurm_dumps_stderr_to_file(tmp_path, job_name):
-    os.chdir(tmp_path)
-    driver = SlurmDriver()
-    failure_message = "failURE"
-    await driver.submit(0, "sh", "-c", f"echo {failure_message} >&2", name=job_name)
-    await poll(driver, {0})
-    assert Path(f"{job_name}.stderr").exists(), "Slurm system did not write stderr file"
-
-    assert (
-        Path(f"{job_name}.stderr").read_text(encoding="utf-8").strip()
-        == failure_message
-    )
-
-
-def generate_random_text(size):
-    letters = string.ascii_letters
-    return "".join(random.choice(letters) for _ in range(size))
-
-
-@pytest.mark.parametrize("tail_chars_to_read", [(5), (50), (500), (700)])
-async def test_slurm_can_retrieve_stdout_and_stderr(
-    tmp_path, job_name, tail_chars_to_read
-):
-    os.chdir(tmp_path)
-    driver = SlurmDriver()
-    num_written_characters = 600
-    _out = generate_random_text(num_written_characters)
-    _err = generate_random_text(num_written_characters)
-    await driver.submit(0, "sh", "-c", f"echo {_out} && echo {_err} >&2", name=job_name)
-    await poll(driver, {0})
-    message = driver.read_stdout_and_stderr_files(
-        runpath=".",
-        job_name=job_name,
-        num_characters_to_read_from_end=tail_chars_to_read,
-    )
-
-    stderr_txt = Path(f"{job_name}.stderr").read_text(encoding="utf-8").strip()
-    stdout_txt = Path(f"{job_name}.stdout").read_text(encoding="utf-8").strip()
-
-    assert stderr_txt[-min(tail_chars_to_read, num_written_characters) + 2 :] in message
-    assert stdout_txt[-min(tail_chars_to_read, num_written_characters) + 2 :] in message
-
-
-@pytest.mark.integration_test
-async def test_submit_to_named_queue(tmp_path, job_name):
-    """If the environment variable _ERT_TEST_ALTERNATIVE_QUEUE is defined
-    a job will be attempted submitted to that queue.
-
-    * Note that what is called a "queue" in Ert is a "partition" in Slurm lingo.
-
-    As Ert does not keep track of which queue a job is executed in, we can only
-    test for success for the job."""
-    os.chdir(tmp_path)
-    driver = SlurmDriver(queue_name=os.getenv("_ERT_TESTS_ALTERNATIVE_QUEUE"))
-    await driver.submit(0, "sh", "-c", f"echo test > {tmp_path}/test", name=job_name)
-    await poll(driver, {0})
-
-    assert (tmp_path / "test").read_text(encoding="utf-8") == "test\n"
-
-
-@pytest.mark.usefixtures("use_tmpdir")
-async def test_submit_with_num_cpu(pytestconfig, job_name):
-    if not pytestconfig.getoption("slurm"):
-        return
-
-    num_cpu = 2
-    driver = SlurmDriver()
-    await driver.submit(0, "sh", "-c", "echo test>test", name=job_name, num_cpu=num_cpu)
-    job_id = driver._iens2jobid[0]
-    await poll(driver, {0})
-
-    process = await asyncio.create_subprocess_exec(
-        "scontrol",
-        "show",
-        "job",
-        job_id,
-        stdout=asyncio.subprocess.PIPE,
-        stderr=asyncio.subprocess.PIPE,
-    )
-    stdout, stderr = await process.communicate()
-    assert " NumCPUs=2 " in stdout.decode(
-        errors="ignore"
-    ), f"Could not verify processor allocation from stdout: {stdout}, stderr: {stderr}"
-
-    assert Path("test").read_text(encoding="utf-8") == "test\n"
-
-
-@pytest.mark.flaky(reruns=3)
-async def test_kill_before_submit_is_finished(
-    tmp_path, monkeypatch, caplog, pytestconfig
-):
-    os.chdir(tmp_path)
-
-    if pytestconfig.getoption("slurm"):
-        # Allow more time when tested on a real compute cluster to avoid false positives.
-        job_kill_window = 5
-        test_grace_time = 10
-    elif sys.platform.startswith("darwin"):
-        # Mitigate flakiness on low-power test nodes
-        job_kill_window = 5
-        test_grace_time = 10
-    else:
-        job_kill_window = 1
-        test_grace_time = 2
-
-    bin_path = tmp_path / "bin"
-    bin_path.mkdir()
-    monkeypatch.setenv("PATH", f"{bin_path}:{os.environ['PATH']}")
-    sbatch_path = bin_path / "slow_sbatch"
-    sbatch_path.write_text(
-        "#!/bin/sh\nsleep 0.1\nsbatch $@",
-        encoding="utf-8",
-    )
-    sbatch_path.chmod(sbatch_path.stat().st_mode | stat.S_IEXEC)
-
-    caplog.set_level(logging.DEBUG)
-    driver = SlurmDriver(sbatch_cmd="slow_sbatch")
-
-    # Allow submit and kill to be interleaved by asyncio by issuing
-    # submit() in its own asyncio Task:
-    asyncio.create_task(
-        driver.submit(
-            # The sleep is the time window in which we can kill the job before
-            # the unwanted finish message appears on disk.
-            0,
-            "sh",
-            "-c",
-            f"sleep {job_kill_window}; touch {tmp_path}/survived",
-        )
-    )
-    await asyncio.sleep(0.01)  # Allow submit task to start executing
-    await driver.kill(0)  # This will wait until the submit is done and then kill
-
-    async def finished(iens: int, returncode: int):
-        assert iens == 0
-        # Slurm assigns returncode 0 even when they are killed.
-        assert returncode == 0
-
-    await poll(driver, {0}, finished=finished)
-
-    # In case the return value of the killed job is correct but the submitted
-    # shell script is still running for whatever reason, a file called
-    # "survived" will appear on disk. Wait for it, and then ensure it is not
-    # there.
-    assert test_grace_time > job_kill_window, "Wrong test setup"
-    await asyncio.sleep(test_grace_time)
-    assert not Path(
-        "survived"
-    ).exists(), "The process children of the job should also have been killed"
diff --git a/tests/integration_tests/share/__init__.py b/tests/integration_tests/share/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/tests/integration_tests/status/__init__.py b/tests/integration_tests/status/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/tests/performance_tests/test_analysis.py b/tests/performance_tests/test_analysis.py
new file mode 100644
index 00000000000..9945178840d
--- /dev/null
+++ b/tests/performance_tests/test_analysis.py
@@ -0,0 +1,183 @@
+from functools import partial
+
+import numpy as np
+import scipy as sp
+import xarray as xr
+import xtgeo
+from scipy.ndimage import gaussian_filter
+
+from ert.analysis import (
+    smoother_update,
+)
+from ert.config import Field, GenDataConfig
+from ert.config.analysis_config import UpdateSettings
+from ert.config.analysis_module import ESSettings
+from ert.field_utils import Shape
+
+
+def test_and_benchmark_adaptive_localization_with_fields(
+    storage, tmp_path, monkeypatch, benchmark
+):
+    monkeypatch.chdir(tmp_path)
+
+    rng = np.random.default_rng(42)
+
+    num_grid_cells = 1000
+    num_parameters = num_grid_cells * num_grid_cells
+    num_observations = 50
+    num_ensemble = 25
+
+    # Create a tridiagonal matrix that maps responses to parameters.
+    # Being tridiagonal, it ensures that each response is influenced only by its neighboring parameters.
+    diagonal = np.ones(min(num_parameters, num_observations))
+    A = sp.sparse.diags(
+        [diagonal, diagonal, diagonal],
+        offsets=[-1, 0, 1],
+        shape=(num_observations, num_parameters),
+        dtype=float,
+    ).toarray()
+
+    # We add some noise that is insignificant compared to the
+    # actual local structure in the forward model step
+    A += rng.standard_normal(size=A.shape) * 0.01
+
+    def g(X):
+        """Apply the forward model."""
+        return A @ X
+
+    all_realizations = np.zeros((num_ensemble, num_grid_cells, num_grid_cells, 1))
+
+    # Generate num_ensemble realizations of the Gaussian Random Field
+    for i in range(num_ensemble):
+        sigma = 10
+        realization = np.exp(
+            gaussian_filter(
+                gaussian_filter(
+                    rng.standard_normal((num_grid_cells, num_grid_cells)), sigma=sigma
+                ),
+                sigma=sigma,
+            )
+        )
+
+        realization = realization[..., np.newaxis]
+        all_realizations[i] = realization
+
+    X = all_realizations.reshape(-1, num_grid_cells * num_grid_cells).T
+
+    Y = g(X)
+
+    # Create observations by adding noise to a realization.
+    observation_noise = rng.standard_normal(size=num_observations)
+    observations = Y[:, 0] + observation_noise
+
+    # Create necessary files and data sets to be able to update
+    # the parameters using the ensemble smoother.
+    shape = Shape(num_grid_cells, num_grid_cells, 1)
+    grid = xtgeo.create_box_grid(dimension=(shape.nx, shape.ny, shape.nz))
+    grid.to_file("MY_EGRID.EGRID", "egrid")
+
+    resp = GenDataConfig(keys=["RESPONSE"])
+    obs = xr.Dataset(
+        {
+            "observations": (
+                ["report_step", "index"],
+                observations.reshape((1, num_observations)),
+            ),
+            "std": (
+                ["report_step", "index"],
+                observation_noise.reshape(1, num_observations),
+            ),
+        },
+        coords={"report_step": [0], "index": np.arange(len(observations))},
+        attrs={"response": "RESPONSE"},
+    )
+
+    param_group = "PARAM_FIELD"
+
+    config = Field.from_config_list(
+        "MY_EGRID.EGRID",
+        shape,
+        [
+            param_group,
+            param_group,
+            "param.GRDECL",
+            "INIT_FILES:param_%d.GRDECL",
+            "FORWARD_INIT:False",
+        ],
+    )
+
+    experiment = storage.create_experiment(
+        parameters=[config],
+        responses=[resp],
+        observations={"OBSERVATION": obs},
+    )
+
+    prior_ensemble = storage.create_ensemble(
+        experiment,
+        ensemble_size=num_ensemble,
+        iteration=0,
+        name="prior",
+    )
+
+    for iens in range(prior_ensemble.ensemble_size):
+        prior_ensemble.save_parameters(
+            param_group,
+            iens,
+            xr.Dataset(
+                {
+                    "values": xr.DataArray(
+                        X[:, iens].reshape(num_grid_cells, num_grid_cells, 1),
+                        dims=("x", "y", "z"),
+                    ),
+                }
+            ),
+        )
+
+        prior_ensemble.save_response(
+            "gen_data",
+            xr.Dataset(
+                {"values": (["name", "report_step", "index"], [[Y[:, iens]]])},
+                coords={
+                    "name": ["RESPONSE"],
+                    "index": range(len(Y[:, iens])),
+                    "report_step": [0],
+                },
+            ),
+            iens,
+        )
+
+    posterior_ensemble = storage.create_ensemble(
+        prior_ensemble.experiment_id,
+        ensemble_size=prior_ensemble.ensemble_size,
+        iteration=1,
+        name="posterior",
+        prior_ensemble=prior_ensemble,
+    )
+
+    smoother_update_run = partial(
+        smoother_update,
+        prior_ensemble,
+        posterior_ensemble,
+        ["OBSERVATION"],
+        [param_group],
+        UpdateSettings(),
+        ESSettings(localization=True),
+    )
+    benchmark(smoother_update_run)
+
+    prior_da = prior_ensemble.load_parameters(param_group, range(num_ensemble))[
+        "values"
+    ]
+    posterior_da = posterior_ensemble.load_parameters(param_group, range(num_ensemble))[
+        "values"
+    ]
+    # Make sure some, but not all parameters were updated.
+    assert not np.allclose(prior_da, posterior_da)
+    # All parameters would be updated with a global update so this would fail.
+    assert np.isclose(prior_da, posterior_da).sum() > 0
+    # The std for the ensemble should decrease
+    assert float(
+        prior_ensemble.calculate_std_dev_for_parameter(param_group)["values"].sum()
+    ) > float(
+        posterior_ensemble.calculate_std_dev_for_parameter(param_group)["values"].sum()
+    )
diff --git a/tests/performance_tests/test_read_summary.py b/tests/performance_tests/test_read_summary.py
new file mode 100644
index 00000000000..a01535349f5
--- /dev/null
+++ b/tests/performance_tests/test_read_summary.py
@@ -0,0 +1,23 @@
+from hypothesis import given
+
+from ert.config._read_summary import read_summary
+from tests.unit_tests.config.summary_generator import (
+    summaries,
+)
+
+
+@given(summaries())
+def test_that_length_of_fetch_keys_does_not_reduce_performance(
+    tmp_path_factory, summary
+):
+    """With a compiled regex this takes seconds to run, and with
+    a naive implementation it will take almost an hour.
+    """
+    tmp_path = tmp_path_factory.mktemp("summary")
+    smspec, unsmry = summary
+    unsmry.to_file(tmp_path / "TEST.UNSMRY")
+    smspec.to_file(tmp_path / "TEST.SMSPEC")
+    fetch_keys = [str(i) for i in range(100000)]
+    (_, keys, time_map, _) = read_summary(str(tmp_path / "TEST"), fetch_keys)
+    assert all(k in fetch_keys for k in keys)
+    assert len(time_map) == len(unsmry.steps)
diff --git a/tests/integration_tests/scheduler/test_integration_local_driver.py b/tests/ui_tests/cli/test_local_driver.py
similarity index 100%
rename from tests/integration_tests/scheduler/test_integration_local_driver.py
rename to tests/ui_tests/cli/test_local_driver.py
diff --git a/tests/ui_tests/gui/test_main_window.py b/tests/ui_tests/gui/test_main_window.py
index 5ed85d787b1..ef4c593eb51 100644
--- a/tests/ui_tests/gui/test_main_window.py
+++ b/tests/ui_tests/gui/test_main_window.py
@@ -8,6 +8,7 @@
 
 import numpy as np
 import pytest
+from pytestqt.qtbot import QtBot
 from qtpy.QtCore import Qt, QTimer
 from qtpy.QtWidgets import (
     QAction,
@@ -15,6 +16,7 @@
     QComboBox,
     QDoubleSpinBox,
     QMenuBar,
+    QMessageBox,
     QPushButton,
     QToolButton,
     QTreeView,
@@ -32,9 +34,11 @@
 from ert.gui.main import ErtMainWindow, GUILogHandler, _setup_main_window
 from ert.gui.simulation.experiment_panel import ExperimentPanel
 from ert.gui.simulation.run_dialog import RunDialog
+from ert.gui.simulation.view import RealizationWidget
 from ert.gui.suggestor import Suggestor
 from ert.gui.suggestor._suggestor_message import SuggestorMessage
 from ert.gui.tools.event_viewer import add_gui_log_handler
+from ert.gui.tools.file.file_dialog import FileDialog
 from ert.gui.tools.manage_experiments import ManageExperimentsTool
 from ert.gui.tools.manage_experiments.storage_widget import AddWidget, StorageWidget
 from ert.gui.tools.plot.data_type_keys_widget import DataTypeKeysWidget
@@ -51,6 +55,7 @@
     SingleTestRun,
 )
 from ert.services import StorageService
+from ert.storage import open_storage
 
 from .conftest import (
     add_experiment_manually,
@@ -217,6 +222,99 @@ def test_that_run_dialog_can_be_closed_after_used_to_open_plots(qtbot, storage):
             plot_window._central_tab.setCurrentWidget(tab)
 
 
+def test_that_run_dialog_can_be_closed_while_file_plot_is_open(
+    snake_oil_case_storage: ErtConfig, qtbot: QtBot
+):
+    """
+    This is a regression test for a crash happening when
+    closing the RunDialog with a file open.
+    """
+
+    snake_oil_case = snake_oil_case_storage
+    args_mock = Mock()
+    args_mock.config = "snake_oil.ert"
+
+    def handle_run_path_error_dialog(gui: ErtMainWindow, qtbot: QtBot):
+        mb = gui.findChild(QMessageBox, "RUN_PATH_ERROR_BOX")
+
+        if mb is not None:
+            assert mb
+            assert isinstance(mb, QMessageBox)
+            # Continue without deleting the runpath
+            qtbot.mouseClick(mb.buttons()[0], Qt.LeftButton)
+
+    def handle_run_path_dialog(
+        gui: ErtMainWindow,
+        qtbot: QtBot,
+        delete_run_path: bool = True,
+        expect_error: bool = False,
+    ):
+        mb = gui.findChild(QMessageBox, "RUN_PATH_WARNING_BOX")
+
+        if mb is not None:
+            assert mb
+            assert isinstance(mb, QMessageBox)
+
+            if delete_run_path:
+                qtbot.mouseClick(mb.checkBox(), Qt.LeftButton)
+
+            qtbot.mouseClick(mb.buttons()[0], Qt.LeftButton)
+            if expect_error:
+                QTimer.singleShot(
+                    1000, lambda: handle_run_path_error_dialog(gui, qtbot)
+                )
+
+    with StorageService.init_service(
+        project=os.path.abspath(snake_oil_case.ens_path),
+    ), open_storage(snake_oil_case.ens_path, mode="w") as storage:
+        gui = _setup_main_window(snake_oil_case, args_mock, GUILogHandler(), storage)
+        experiment_panel = gui.findChild(ExperimentPanel)
+
+        run_experiment = experiment_panel.findChild(QWidget, name="run_experiment")
+        assert run_experiment
+        assert isinstance(run_experiment, QToolButton)
+
+        QTimer.singleShot(
+            1000, lambda: handle_run_path_dialog(gui, qtbot, delete_run_path=True)
+        )
+        qtbot.mouseClick(run_experiment, Qt.LeftButton)
+
+        qtbot.waitUntil(lambda: gui.findChild(RunDialog) is not None, timeout=5000)
+        run_dialog = gui.findChild(RunDialog)
+        qtbot.waitUntil(run_dialog.done_button.isVisible, timeout=100000)
+        fm_step_overview = run_dialog._fm_step_overview
+
+        qtbot.waitUntil(fm_step_overview.isVisible, timeout=20000)
+        qtbot.waitUntil(run_dialog.done_button.isVisible, timeout=200000)
+
+        realization_widget = run_dialog.findChild(RealizationWidget)
+
+        click_pos = realization_widget._real_view.rectForIndex(
+            realization_widget._real_list_model.index(0, 0)
+        ).center()
+
+        with qtbot.waitSignal(realization_widget.itemClicked, timeout=30000):
+            qtbot.mouseClick(
+                realization_widget._real_view.viewport(),
+                Qt.LeftButton,
+                pos=click_pos,
+            )
+
+        click_pos = fm_step_overview.visualRect(
+            fm_step_overview.model().index(0, 4)
+        ).center()
+        qtbot.mouseClick(fm_step_overview.viewport(), Qt.LeftButton, pos=click_pos)
+
+        qtbot.waitUntil(run_dialog.findChild(FileDialog).isVisible, timeout=30000)
+
+        with qtbot.waitSignal(run_dialog.accepted, timeout=30000):
+            run_dialog.close()  # Close the run dialog by pressing 'x' close button
+
+        # Ensure that once the run dialog is closed
+        # another simulation can be started
+        assert run_experiment.isEnabled()
+
+
 @pytest.mark.usefixtures("set_site_config")
 def test_help_buttons_in_suggester_dialog(tmp_path, qtbot):
     """
diff --git a/tests/unit_tests/gui/tools/test_manage_experiments_tool.py b/tests/ui_tests/gui/test_manage_experiments_tool.py
similarity index 100%
rename from tests/unit_tests/gui/tools/test_manage_experiments_tool.py
rename to tests/ui_tests/gui/test_manage_experiments_tool.py
diff --git a/tests/unit_tests/analysis/test_es_update.py b/tests/unit_tests/analysis/test_es_update.py
index 7cfdf14874f..fe59b76849e 100644
--- a/tests/unit_tests/analysis/test_es_update.py
+++ b/tests/unit_tests/analysis/test_es_update.py
@@ -1,17 +1,14 @@
 import functools
 import re
 from contextlib import ExitStack as does_not_raise
-from functools import partial
 from pathlib import Path
 from unittest.mock import patch
 
 import numpy as np
 import pytest
-import scipy as sp
 import xarray as xr
 import xtgeo
 from iterative_ensemble_smoother import steplength_exponential
-from scipy.ndimage import gaussian_filter
 from tabulate import tabulate
 
 from ert.analysis import (
@@ -69,6 +66,7 @@ def remove_timestamp_from_logfile(log_file: Path):
         fout.write(buf)
 
 
+@pytest.mark.integration_test
 @pytest.mark.flaky(reruns=5)
 @pytest.mark.parametrize(
     "misfit_preprocess", [[["*"]], [], [["FOPR"]], [["FOPR"], ["WOPR_OP1_1*"]]]
@@ -450,174 +448,6 @@ def test_smoother_snapshot_alpha(
         ] == expected
 
 
-def test_and_benchmark_adaptive_localization_with_fields(
-    storage, tmp_path, monkeypatch, benchmark
-):
-    monkeypatch.chdir(tmp_path)
-
-    rng = np.random.default_rng(42)
-
-    num_grid_cells = 1000
-    num_parameters = num_grid_cells * num_grid_cells
-    num_observations = 50
-    num_ensemble = 25
-
-    # Create a tridiagonal matrix that maps responses to parameters.
-    # Being tridiagonal, it ensures that each response is influenced only by its neighboring parameters.
-    diagonal = np.ones(min(num_parameters, num_observations))
-    A = sp.sparse.diags(
-        [diagonal, diagonal, diagonal],
-        offsets=[-1, 0, 1],
-        shape=(num_observations, num_parameters),
-        dtype=float,
-    ).toarray()
-
-    # We add some noise that is insignificant compared to the
-    # actual local structure in the forward model step
-    A += rng.standard_normal(size=A.shape) * 0.01
-
-    def g(X):
-        """Apply the forward model."""
-        return A @ X
-
-    all_realizations = np.zeros((num_ensemble, num_grid_cells, num_grid_cells, 1))
-
-    # Generate num_ensemble realizations of the Gaussian Random Field
-    for i in range(num_ensemble):
-        sigma = 10
-        realization = np.exp(
-            gaussian_filter(
-                gaussian_filter(
-                    rng.standard_normal((num_grid_cells, num_grid_cells)), sigma=sigma
-                ),
-                sigma=sigma,
-            )
-        )
-
-        realization = realization[..., np.newaxis]
-        all_realizations[i] = realization
-
-    X = all_realizations.reshape(-1, num_grid_cells * num_grid_cells).T
-
-    Y = g(X)
-
-    # Create observations by adding noise to a realization.
-    observation_noise = rng.standard_normal(size=num_observations)
-    observations = Y[:, 0] + observation_noise
-
-    # Create necessary files and data sets to be able to update
-    # the parameters using the ensemble smoother.
-    shape = Shape(num_grid_cells, num_grid_cells, 1)
-    grid = xtgeo.create_box_grid(dimension=(shape.nx, shape.ny, shape.nz))
-    grid.to_file("MY_EGRID.EGRID", "egrid")
-
-    resp = GenDataConfig(keys=["RESPONSE"])
-    obs = xr.Dataset(
-        {
-            "observations": (
-                ["report_step", "index"],
-                observations.reshape((1, num_observations)),
-            ),
-            "std": (
-                ["report_step", "index"],
-                observation_noise.reshape(1, num_observations),
-            ),
-        },
-        coords={"report_step": [0], "index": np.arange(len(observations))},
-        attrs={"response": "RESPONSE"},
-    )
-
-    param_group = "PARAM_FIELD"
-
-    config = Field.from_config_list(
-        "MY_EGRID.EGRID",
-        shape,
-        [
-            param_group,
-            param_group,
-            "param.GRDECL",
-            "INIT_FILES:param_%d.GRDECL",
-            "FORWARD_INIT:False",
-        ],
-    )
-
-    experiment = storage.create_experiment(
-        parameters=[config],
-        responses=[resp],
-        observations={"OBSERVATION": obs},
-    )
-
-    prior_ensemble = storage.create_ensemble(
-        experiment,
-        ensemble_size=num_ensemble,
-        iteration=0,
-        name="prior",
-    )
-
-    for iens in range(prior_ensemble.ensemble_size):
-        prior_ensemble.save_parameters(
-            param_group,
-            iens,
-            xr.Dataset(
-                {
-                    "values": xr.DataArray(
-                        X[:, iens].reshape(num_grid_cells, num_grid_cells, 1),
-                        dims=("x", "y", "z"),
-                    ),
-                }
-            ),
-        )
-
-        prior_ensemble.save_response(
-            "gen_data",
-            xr.Dataset(
-                {"values": (["name", "report_step", "index"], [[Y[:, iens]]])},
-                coords={
-                    "name": ["RESPONSE"],
-                    "index": range(len(Y[:, iens])),
-                    "report_step": [0],
-                },
-            ),
-            iens,
-        )
-
-    posterior_ensemble = storage.create_ensemble(
-        prior_ensemble.experiment_id,
-        ensemble_size=prior_ensemble.ensemble_size,
-        iteration=1,
-        name="posterior",
-        prior_ensemble=prior_ensemble,
-    )
-
-    smoother_update_run = partial(
-        smoother_update,
-        prior_ensemble,
-        posterior_ensemble,
-        ["OBSERVATION"],
-        [param_group],
-        UpdateSettings(),
-        ESSettings(localization=True),
-    )
-    benchmark(smoother_update_run)
-
-    prior_da = prior_ensemble.load_parameters(param_group, range(num_ensemble))[
-        "values"
-    ]
-    posterior_da = posterior_ensemble.load_parameters(param_group, range(num_ensemble))[
-        "values"
-    ]
-    # Make sure some, but not all parameters were updated.
-    assert not np.allclose(prior_da, posterior_da)
-    # All parameters would be updated with a global update so this would fail.
-    assert np.isclose(prior_da, posterior_da).sum() > 0
-    # The std for the ensemble should decrease
-    assert float(
-        prior_ensemble.calculate_std_dev_for_parameter(param_group)["values"].sum()
-    ) > float(
-        posterior_ensemble.calculate_std_dev_for_parameter(param_group)["values"].sum()
-    )
-
-
 def test_update_only_using_subset_observations(
     snake_oil_case_storage, snake_oil_storage, snapshot
 ):
diff --git a/tests/unit_tests/config/parsing/test_observations_parser.py b/tests/unit_tests/config/parsing/test_observations_parser.py
index 48282565bd4..19d83bd4df6 100644
--- a/tests/unit_tests/config/parsing/test_observations_parser.py
+++ b/tests/unit_tests/config/parsing/test_observations_parser.py
@@ -20,6 +20,7 @@
 observation_contents = stlark.from_lark(observations_parser)
 
 
+@pytest.mark.integration_test
 @given(observation_contents)
 def test_parsing_contents_succeeds_or_gives_config_error(contents):
     with suppress(ObservationConfigError):
diff --git a/tests/unit_tests/config/test_ert_config.py b/tests/unit_tests/config/test_ert_config.py
index 7cc2e9c0349..8ef95f8b435 100644
--- a/tests/unit_tests/config/test_ert_config.py
+++ b/tests/unit_tests/config/test_ert_config.py
@@ -506,6 +506,7 @@ def test_that_subst_list_is_given_default_runpath_file():
     )
 
 
+@pytest.mark.integration_test
 @pytest.mark.filterwarnings("ignore::ert.config.ConfigWarning")
 @pytest.mark.usefixtures("set_site_config")
 @settings(max_examples=10)
@@ -520,6 +521,7 @@ def test_that_creating_ert_config_from_dict_is_same_as_from_file(
         ) == ErtConfig.from_file(filename)
 
 
+@pytest.mark.integration_test
 @pytest.mark.filterwarnings("ignore::ert.config.ConfigWarning")
 @pytest.mark.usefixtures("set_site_config")
 @settings(max_examples=10)
@@ -614,6 +616,7 @@ def test_queue_config_max_running_invalid_values(max_running_value, expected_err
         ErtConfig.from_file(test_config_file_name)
 
 
+@pytest.mark.integration_test
 @pytest.mark.filterwarnings("ignore::ert.config.ConfigWarning")
 @pytest.mark.usefixtures("use_tmpdir")
 @given(st.integers(min_value=0), st.integers(min_value=0), st.integers(min_value=0))
@@ -922,6 +925,7 @@ def test_that_unknown_hooked_job_gives_config_validation_error():
         _ = ErtConfig.from_file(test_config_file_name)
 
 
+@pytest.mark.integration_test
 @pytest.mark.usefixtures("set_site_config")
 @settings(max_examples=10)
 @given(config_generators())
diff --git a/tests/unit_tests/config/test_observations.py b/tests/unit_tests/config/test_observations.py
index bf0c6e5e947..9d3dfb35e66 100644
--- a/tests/unit_tests/config/test_observations.py
+++ b/tests/unit_tests/config/test_observations.py
@@ -188,6 +188,7 @@ def test_gen_obs_invalid_observation_std(std):
         )
 
 
+@pytest.mark.integration_test
 @settings(max_examples=10)
 @pytest.mark.filterwarnings("ignore::UserWarning")
 @pytest.mark.filterwarnings("ignore::RuntimeWarning")
diff --git a/tests/unit_tests/config/test_parser_error_collection.py b/tests/unit_tests/config/test_parser_error_collection.py
index 02d678bc8b7..2c1ced4ec40 100644
--- a/tests/unit_tests/config/test_parser_error_collection.py
+++ b/tests/unit_tests/config/test_parser_error_collection.py
@@ -318,6 +318,7 @@ def test_that_multiple_keyword_specific_tokens_are_located(contents, expected_er
         )
 
 
+@pytest.mark.integration_test
 @pytest.mark.usefixtures("use_tmpdir")
 @given(
     strategies.lists(
@@ -762,6 +763,7 @@ def test_that_unicode_decode_error_is_localized_random_line_single_insert():
         assert collected_errors[0].end_column == -1
 
 
+@pytest.mark.integration_test
 @pytest.mark.usefixtures("use_tmpdir")
 @given(
     lines=strategies.lists(
@@ -1066,6 +1068,7 @@ def test_that_executable_directory_errors(dirname):
     ],
 )
 @pytest.mark.usefixtures("use_tmpdir")
+@pytest.mark.integration_test
 def test_that_deprecations_are_handled(contents, expected_errors):
     for expected_error in expected_errors:
         assert_that_config_leads_to_warning(
diff --git a/tests/unit_tests/config/test_read_summary.py b/tests/unit_tests/config/test_read_summary.py
index 77c488c36b2..efc5b959569 100644
--- a/tests/unit_tests/config/test_read_summary.py
+++ b/tests/unit_tests/config/test_read_summary.py
@@ -242,6 +242,7 @@ def test_local_well_summary_format_have_cell_index_and_name(keyword, name, lgr_n
     )
 
 
+@pytest.mark.integration_test
 @given(summaries(), st.sampled_from(resfo.Format))
 def test_that_reading_summaries_returns_the_contents_of_the_file(
     tmp_path_factory, summary, format
@@ -483,20 +484,3 @@ def test_that_ambiguous_case_restart_raises_an_informative_error(
         match="Ambiguous reference to unified summary",
     ):
         read_summary(str(tmp_path / "test"), ["*"])
-
-
-@given(summaries())
-def test_that_length_of_fetch_keys_does_not_reduce_performance(
-    tmp_path_factory, summary
-):
-    """With a compiled regex this takes seconds to run, and with
-    a naive implementation it will take almost an hour.
-    """
-    tmp_path = tmp_path_factory.mktemp("summary")
-    smspec, unsmry = summary
-    unsmry.to_file(tmp_path / "TEST.UNSMRY")
-    smspec.to_file(tmp_path / "TEST.SMSPEC")
-    fetch_keys = [str(i) for i in range(100000)]
-    (_, keys, time_map, _) = read_summary(str(tmp_path / "TEST"), fetch_keys)
-    assert all(k in fetch_keys for k in keys)
-    assert len(time_map) == len(unsmry.steps)
diff --git a/tests/unit_tests/config/test_substitution_list.py b/tests/unit_tests/config/test_substitution_list.py
index 1002ee91867..a4d4ce54a0b 100644
--- a/tests/unit_tests/config/test_substitution_list.py
+++ b/tests/unit_tests/config/test_substitution_list.py
@@ -1,5 +1,6 @@
 import os
 
+import pytest
 from hypothesis import assume, given, settings
 
 from ert.config import ErtConfig
@@ -8,6 +9,7 @@
 from .config_dict_generator import config_generators
 
 
+@pytest.mark.integration_test
 @settings(max_examples=10)
 @given(config_generators(), config_generators())
 def test_different_defines_give_different_subst_lists(
diff --git a/tests/unit_tests/config/test_summary_config.py b/tests/unit_tests/config/test_summary_config.py
index 88e7e4a4fe8..992f635d9a9 100644
--- a/tests/unit_tests/config/test_summary_config.py
+++ b/tests/unit_tests/config/test_summary_config.py
@@ -1,6 +1,6 @@
 import hypothesis.strategies as st
 import pytest
-from hypothesis import given
+from hypothesis import given, settings
 
 from ert.config import ConfigValidationError, ErtConfig, SummaryConfig
 
@@ -17,9 +17,10 @@ def test_bad_user_config_file_error_message(tmp_path):
         _ = ErtConfig.from_file(str(tmp_path / "test.ert"))
 
 
+@settings(max_examples=10)
 @given(summaries(summary_keys=st.just(["WOPR:OP1"])))
 @pytest.mark.usefixtures("use_tmpdir")
-def test_rading_empty_summaries_raises(wopr_summary):
+def test_reading_empty_summaries_raises(wopr_summary):
     smspec, unsmry = wopr_summary
     smspec.to_file("CASE.SMSPEC")
     unsmry.to_file("CASE.UNSMRY")
diff --git a/tests/unit_tests/dark_storage/test_http_endpoints.py b/tests/unit_tests/dark_storage/test_http_endpoints.py
index d1d52ad4eae..fc5c9d9156e 100644
--- a/tests/unit_tests/dark_storage/test_http_endpoints.py
+++ b/tests/unit_tests/dark_storage/test_http_endpoints.py
@@ -7,6 +7,7 @@
 from requests import Response
 
 
+@pytest.mark.integration_test
 def test_get_experiment(poly_example_tmp_dir, dark_storage_client):
     resp: Response = dark_storage_client.get("/experiments")
     answer_json = resp.json()
@@ -16,6 +17,7 @@ def test_get_experiment(poly_example_tmp_dir, dark_storage_client):
     assert "name" in answer_json[0]
 
 
+@pytest.mark.integration_test
 def test_get_ensemble(poly_example_tmp_dir, dark_storage_client):
     resp: Response = dark_storage_client.get("/experiments")
     experiment_json = resp.json()
@@ -32,6 +34,7 @@ def test_get_ensemble(poly_example_tmp_dir, dark_storage_client):
     assert ensemble_json["userdata"]["experiment_name"] == experiment_json[0]["name"]
 
 
+@pytest.mark.integration_test
 def test_get_experiment_ensemble(poly_example_tmp_dir, dark_storage_client):
     resp: Response = dark_storage_client.get("/experiments")
     experiment_json = resp.json()
@@ -48,6 +51,7 @@ def test_get_experiment_ensemble(poly_example_tmp_dir, dark_storage_client):
     assert ensembles_json[0]["userdata"]["name"] in ("iter-0", "iter-1")
 
 
+@pytest.mark.integration_test
 def test_get_responses_with_observations(poly_example_tmp_dir, dark_storage_client):
     resp: Response = dark_storage_client.get("/experiments")
     experiment_json = resp.json()
@@ -61,6 +65,7 @@ def test_get_responses_with_observations(poly_example_tmp_dir, dark_storage_clie
     assert ensemble_json["POLY_RES@0"]["has_observations"] is True
 
 
+@pytest.mark.integration_test
 def test_get_response(poly_example_tmp_dir, dark_storage_client):
     resp: Response = dark_storage_client.get("/experiments")
     experiment_json = resp.json()
@@ -110,6 +115,7 @@ def test_get_response(poly_example_tmp_dir, dark_storage_client):
     assert len(record_df1.index) == 3
 
 
+@pytest.mark.integration_test
 def test_get_ensemble_parameters(poly_example_tmp_dir, dark_storage_client):
     resp: Response = dark_storage_client.get("/experiments")
     answer_json = resp.json()
@@ -139,11 +145,13 @@ def test_get_ensemble_parameters(poly_example_tmp_dir, dark_storage_client):
     }
 
 
+@pytest.mark.integration_test
 def test_refresh_facade(poly_example_tmp_dir, dark_storage_client):
     resp: Response = dark_storage_client.post("/updates/facade")
     assert resp.status_code == 200
 
 
+@pytest.mark.integration_test
 def test_get_experiment_observations(poly_example_tmp_dir, dark_storage_client):
     resp: Response = dark_storage_client.get("/experiments")
     experiment_json = resp.json()
@@ -161,6 +169,7 @@ def test_get_experiment_observations(poly_example_tmp_dir, dark_storage_client):
     assert len(response_json[0]["x_axis"]) == 5
 
 
+@pytest.mark.integration_test
 def test_get_record_observations(poly_example_tmp_dir, dark_storage_client):
     resp: Response = dark_storage_client.get("/experiments")
     answer_json = resp.json()
@@ -178,6 +187,7 @@ def test_get_record_observations(poly_example_tmp_dir, dark_storage_client):
     assert len(response_json[0]["x_axis"]) == 5
 
 
+@pytest.mark.integration_test
 def test_misfit_endpoint(poly_example_tmp_dir, dark_storage_client):
     resp: Response = dark_storage_client.get("/experiments")
     experiment_json = resp.json()
@@ -193,6 +203,7 @@ def test_misfit_endpoint(poly_example_tmp_dir, dark_storage_client):
     assert misfit.shape == (3, 5)
 
 
+@pytest.mark.integration_test
 @pytest.mark.parametrize(
     "coeffs",
     [
diff --git a/tests/unit_tests/ensemble_evaluator/test_ensemble_evaluator.py b/tests/unit_tests/ensemble_evaluator/test_ensemble_evaluator.py
index 51da8184d12..239d26a32f8 100644
--- a/tests/unit_tests/ensemble_evaluator/test_ensemble_evaluator.py
+++ b/tests/unit_tests/ensemble_evaluator/test_ensemble_evaluator.py
@@ -95,6 +95,7 @@ async def evaluator_to_use_fixture(make_ee_config):
     await run_task
 
 
+@pytest.mark.integration_test
 @pytest.mark.timeout(20)
 async def test_restarted_jobs_do_not_have_error_msgs(evaluator_to_use):
     evaluator = evaluator_to_use
@@ -187,6 +188,7 @@ def check_if_final_snapshot_is_complete(snapshot: EnsembleSnapshot) -> bool:
                 break
 
 
+@pytest.mark.integration_test
 @pytest.mark.timeout(20)
 async def test_new_monitor_can_pick_up_where_we_left_off(evaluator_to_use):
     evaluator = evaluator_to_use
@@ -423,6 +425,7 @@ async def test_dispatch_endpoint_clients_can_connect_and_monitor_can_shut_down_e
                 raise AssertionError(f"got unexpected event {event} from monitor2")
 
 
+@pytest.mark.integration_test
 async def test_ensure_multi_level_events_in_order(evaluator_to_use):
     evaluator = evaluator_to_use
 
diff --git a/tests/unit_tests/ensemble_evaluator/test_ensemble_legacy.py b/tests/unit_tests/ensemble_evaluator/test_ensemble_legacy.py
index 02ea3924b15..fbca5a2a265 100644
--- a/tests/unit_tests/ensemble_evaluator/test_ensemble_legacy.py
+++ b/tests/unit_tests/ensemble_evaluator/test_ensemble_legacy.py
@@ -30,6 +30,7 @@ async def run_evaluator(ensemble, ee_config):
     return run_evaluator
 
 
+@pytest.mark.integration_test
 @pytest.mark.timeout(60)
 @pytest.mark.asyncio
 async def test_run_legacy_ensemble(
@@ -65,6 +66,7 @@ async def test_run_legacy_ensemble(
             assert os.path.isfile(f"real_{i}/status.txt")
 
 
+@pytest.mark.integration_test
 @pytest.mark.timeout(60)
 async def test_run_and_cancel_legacy_ensemble(
     tmpdir, make_ensemble, monkeypatch, evaluator_to_use
diff --git a/tests/unit_tests/ensemble_evaluator/test_scheduler.py b/tests/unit_tests/ensemble_evaluator/test_scheduler.py
index c5004a74a6c..8fbfb0dd47f 100644
--- a/tests/unit_tests/ensemble_evaluator/test_scheduler.py
+++ b/tests/unit_tests/ensemble_evaluator/test_scheduler.py
@@ -14,6 +14,7 @@
 from ert.ensemble_evaluator.config import EvaluatorServerConfig
 
 
+@pytest.mark.integration_test
 @pytest.mark.timeout(60)
 async def test_scheduler_receives_checksum_and_waits_for_disk_sync(
     tmpdir, make_ensemble, monkeypatch, caplog
diff --git a/tests/unit_tests/forward_model_runner/test_event_reporter.py b/tests/unit_tests/forward_model_runner/test_event_reporter.py
index cbacd6bcf3e..783952dc905 100644
--- a/tests/unit_tests/forward_model_runner/test_event_reporter.py
+++ b/tests/unit_tests/forward_model_runner/test_event_reporter.py
@@ -177,6 +177,7 @@ def test_report_inconsistent_events(unused_tcp_port):
         reporter.report(Finish())
 
 
+@pytest.mark.integration_test
 def test_report_with_failed_reporter_but_finished_jobs(unused_tcp_port):
     # this is to show when the reporter fails ert won't crash nor
     # staying hanging but instead finishes up the job;
@@ -259,6 +260,7 @@ def send_func(msg):
     assert len(lines) == 3, "expected 3 Job running messages"
 
 
+@pytest.mark.integration_test
 def test_report_with_closed_received_exiting_gracefully(unused_tcp_port):
     # Whenever the receiver end closes the connection, a ConnectionClosedOK is raised
     # The reporter should exit the publisher thread gracefully and not send any
diff --git a/tests/unit_tests/forward_model_runner/test_job.py b/tests/unit_tests/forward_model_runner/test_job.py
index 3a36560d427..84f76007eff 100644
--- a/tests/unit_tests/forward_model_runner/test_job.py
+++ b/tests/unit_tests/forward_model_runner/test_job.py
@@ -40,6 +40,7 @@ def test_run_with_process_failing(
         next(run)
 
 
+@pytest.mark.integration_test
 @pytest.mark.flaky(reruns=5)
 @pytest.mark.usefixtures("use_tmpdir")
 def test_memory_usage_counts_grandchildren():
diff --git a/tests/unit_tests/gui/model/test_snapshot.py b/tests/unit_tests/gui/model/test_snapshot.py
index 17d2550668a..0ea62b4ab20 100644
--- a/tests/unit_tests/gui/model/test_snapshot.py
+++ b/tests/unit_tests/gui/model/test_snapshot.py
@@ -1,3 +1,4 @@
+import pytest
 from pytestqt.qt_compat import qt_api
 from qtpy.QtCore import QModelIndex
 from qtpy.QtGui import QColor
@@ -8,6 +9,7 @@
 from .gui_models_utils import finish_snapshot
 
 
+@pytest.mark.integration_test
 def test_using_qt_model_tester(qtmodeltester, full_snapshot):
     model = SnapshotModel()
 
diff --git a/tests/unit_tests/gui/plottery/test_histogram.py b/tests/unit_tests/gui/plottery/test_histogram.py
index 989d11cbb1c..4a62717b070 100644
--- a/tests/unit_tests/gui/plottery/test_histogram.py
+++ b/tests/unit_tests/gui/plottery/test_histogram.py
@@ -59,6 +59,7 @@ def ensemble_to_data_map(request, plot_context):
     return dict.fromkeys(plot_context.ensembles(), request.param)
 
 
+@pytest.mark.integration_test
 @pytest.mark.mpl_image_compare(tolerance=10)
 def test_histogram(plot_context: PlotContext, ensemble_to_data_map):
     figure = Figure()
diff --git a/tests/unit_tests/gui/simulation/test_run_dialog.py b/tests/unit_tests/gui/simulation/test_run_dialog.py
index 1dea3a17fba..6416246ed07 100644
--- a/tests/unit_tests/gui/simulation/test_run_dialog.py
+++ b/tests/unit_tests/gui/simulation/test_run_dialog.py
@@ -94,6 +94,7 @@ def handle_dialog():
         qtbot.mouseClick(run_dialog.kill_button, Qt.LeftButton)
 
 
+@pytest.mark.integration_test
 def test_run_dialog_polls_run_model_for_runtime(
     qtbot: QtBot, run_dialog: RunDialog, run_model, notifier, event_queue
 ):
@@ -359,69 +360,6 @@ def test_run_dialog(events, tab_widget_count, qtbot: QtBot, run_dialog, event_qu
     qtbot.waitUntil(lambda: not run_dialog.done_button.isHidden(), timeout=5000)
 
 
-def test_that_run_dialog_can_be_closed_while_file_plot_is_open(
-    snake_oil_case_storage: ErtConfig, qtbot: QtBot
-):
-    """
-    This is a regression test for a crash happening when
-    closing the RunDialog with a file open.
-    """
-
-    snake_oil_case = snake_oil_case_storage
-    args_mock = Mock()
-    args_mock.config = "snake_oil.ert"
-
-    with StorageService.init_service(
-        project=os.path.abspath(snake_oil_case.ens_path),
-    ), open_storage(snake_oil_case.ens_path, mode="w") as storage:
-        gui = _setup_main_window(snake_oil_case, args_mock, GUILogHandler(), storage)
-        experiment_panel = gui.findChild(ExperimentPanel)
-
-        run_experiment = experiment_panel.findChild(QWidget, name="run_experiment")
-        assert run_experiment
-        assert isinstance(run_experiment, QToolButton)
-
-        QTimer.singleShot(
-            1000, lambda: handle_run_path_dialog(gui, qtbot, delete_run_path=True)
-        )
-        qtbot.mouseClick(run_experiment, Qt.LeftButton)
-
-        qtbot.waitUntil(lambda: gui.findChild(RunDialog) is not None, timeout=5000)
-        run_dialog = gui.findChild(RunDialog)
-        qtbot.waitUntil(run_dialog.done_button.isVisible, timeout=100000)
-        fm_step_overview = run_dialog._fm_step_overview
-
-        qtbot.waitUntil(fm_step_overview.isVisible, timeout=20000)
-        qtbot.waitUntil(run_dialog.done_button.isVisible, timeout=200000)
-
-        realization_widget = run_dialog.findChild(RealizationWidget)
-
-        click_pos = realization_widget._real_view.rectForIndex(
-            realization_widget._real_list_model.index(0, 0)
-        ).center()
-
-        with qtbot.waitSignal(realization_widget.itemClicked, timeout=30000):
-            qtbot.mouseClick(
-                realization_widget._real_view.viewport(),
-                Qt.LeftButton,
-                pos=click_pos,
-            )
-
-        click_pos = fm_step_overview.visualRect(
-            fm_step_overview.model().index(0, 4)
-        ).center()
-        qtbot.mouseClick(fm_step_overview.viewport(), Qt.LeftButton, pos=click_pos)
-
-        qtbot.waitUntil(run_dialog.findChild(FileDialog).isVisible, timeout=30000)
-
-        with qtbot.waitSignal(run_dialog.accepted, timeout=30000):
-            run_dialog.close()  # Close the run dialog by pressing 'x' close button
-
-        # Ensure that once the run dialog is closed
-        # another simulation can be started
-        assert run_experiment.isEnabled()
-
-
 @pytest.mark.parametrize(
     "events,tab_widget_count",
     [
@@ -521,6 +459,7 @@ def test_run_dialog_memory_usage_showing(
     assert max_memory_value == "60.00 KB"
 
 
+@pytest.mark.integration_test
 @pytest.mark.usefixtures("use_tmpdir")
 def test_that_exception_in_base_run_model_is_handled(qtbot: QtBot, storage):
     config_file = "minimal_config.ert"
@@ -556,6 +495,7 @@ def handle_error_dialog(run_dialog):
         qtbot.waitUntil(run_dialog.done_button.isVisible, timeout=200000)
 
 
+@pytest.mark.integration_test
 @pytest.mark.usefixtures("use_tmpdir")
 def test_that_debug_info_button_provides_data_in_clipboard(qtbot: QtBot, storage):
     config_file = "minimal_config.ert"
@@ -592,6 +532,7 @@ def test_that_debug_info_button_provides_data_in_clipboard(qtbot: QtBot, storage
             assert keyword in clipboard_text
 
 
+@pytest.mark.integration_test
 def test_that_stdout_and_stderr_buttons_react_to_file_content(
     snake_oil_case_storage: ErtConfig, qtbot: QtBot
 ):
diff --git a/tests/unit_tests/gui/simulation/test_run_path_dialog.py b/tests/unit_tests/gui/simulation/test_run_path_dialog.py
index feaffd52e15..95670548dca 100644
--- a/tests/unit_tests/gui/simulation/test_run_path_dialog.py
+++ b/tests/unit_tests/gui/simulation/test_run_path_dialog.py
@@ -2,6 +2,7 @@
 from pathlib import Path
 from unittest.mock import Mock, patch
 
+import pytest
 from pytestqt.qtbot import QtBot
 from qtpy.QtCore import Qt, QTimer
 from qtpy.QtWidgets import QComboBox, QMessageBox, QToolButton, QWidget
@@ -48,6 +49,7 @@ def handle_run_path_error_dialog(gui: ErtMainWindow, qtbot: QtBot):
         qtbot.mouseClick(mb.buttons()[0], Qt.LeftButton)
 
 
+@pytest.mark.integration_test
 def test_run_path_deleted_error(
     snake_oil_case_storage: ErtConfig, qtbot: QtBot, mocker
 ):
@@ -97,6 +99,7 @@ def test_run_path_deleted_error(
         assert os.path.exists(run_path / dummy_file.name)
 
 
+@pytest.mark.integration_test
 def test_run_path_is_deleted(snake_oil_case_storage: ErtConfig, qtbot: QtBot):
     snake_oil_case = snake_oil_case_storage
     args_mock = Mock()
@@ -142,6 +145,7 @@ def test_run_path_is_deleted(snake_oil_case_storage: ErtConfig, qtbot: QtBot):
         assert not os.path.exists(run_path / dummy_file.name)
 
 
+@pytest.mark.integration_test
 def test_run_path_is_not_deleted(snake_oil_case_storage: ErtConfig, qtbot: QtBot):
     snake_oil_case = snake_oil_case_storage
     args_mock = Mock()
diff --git a/tests/integration_tests/__init__.py b/tests/unit_tests/resources/__init__.py
similarity index 100%
rename from tests/integration_tests/__init__.py
rename to tests/unit_tests/resources/__init__.py
diff --git a/tests/integration_tests/share/_import_from_location.py b/tests/unit_tests/resources/_import_from_location.py
similarity index 100%
rename from tests/integration_tests/share/_import_from_location.py
rename to tests/unit_tests/resources/_import_from_location.py
diff --git a/tests/integration_tests/share/ecl_run_fail b/tests/unit_tests/resources/ecl_run_fail
similarity index 100%
rename from tests/integration_tests/share/ecl_run_fail
rename to tests/unit_tests/resources/ecl_run_fail
diff --git a/tests/integration_tests/share/test_ecl_run_new_config.py b/tests/unit_tests/resources/test_ecl_run_new_config.py
similarity index 95%
rename from tests/integration_tests/share/test_ecl_run_new_config.py
rename to tests/unit_tests/resources/test_ecl_run_new_config.py
index 7d9a209048b..4349dc0b571 100644
--- a/tests/integration_tests/share/test_ecl_run_new_config.py
+++ b/tests/unit_tests/resources/test_ecl_run_new_config.py
@@ -15,7 +15,7 @@
 
 from ._import_from_location import import_from_location
 
-# import ecl_config.py and ecl_run from ert/forward-models/res/script
+# import ecl_config.py and ecl_run from ert/resources/forward-models/res/script
 # package-data path which. These are kept out of the ert package to avoid the
 # overhead of importing ert. This is necessary as these may be invoked as a
 # subprocess on each realization.
@@ -104,6 +104,7 @@ def test_env(eclrun_conf):
             assert v == run_env[k]
 
 
+@pytest.mark.integration_test
 @pytest.mark.usefixtures("use_tmpdir", "init_eclrun_config")
 @pytest.mark.requires_eclipse
 def test_run(source_root):
@@ -127,6 +128,7 @@ def test_run(source_root):
     assert len(errors) == 0
 
 
+@pytest.mark.integration_test
 @pytest.mark.usefixtures("use_tmpdir", "init_eclrun_config")
 @pytest.mark.requires_eclipse
 def test_run_new_log_file(source_root):
@@ -150,6 +152,7 @@ def test_run_new_log_file(source_root):
     assert len(errors) == 0
 
 
+@pytest.mark.integration_test
 @pytest.mark.requires_eclipse
 @pytest.mark.usefixtures("use_tmpdir", "init_eclrun_config")
 def test_run_api(source_root):
@@ -163,6 +166,7 @@ def test_run_api(source_root):
     assert os.path.isfile("SPE1.DATA")
 
 
+@pytest.mark.integration_test
 @pytest.mark.requires_eclipse
 @pytest.mark.usefixtures("use_tmpdir", "init_eclrun_config")
 def test_failed_run(source_root):
@@ -177,6 +181,7 @@ def test_failed_run(source_root):
         erun.runEclipse(eclrun_config=eclrun_config)
 
 
+@pytest.mark.integration_test
 @pytest.mark.requires_eclipse
 @pytest.mark.usefixtures("use_tmpdir", "init_eclrun_config")
 def test_failed_run_nonzero_returncode(monkeypatch):
@@ -193,6 +198,7 @@ def test_failed_run_nonzero_returncode(monkeypatch):
         erun.runEclipse(eclrun_config=eclrun_config)
 
 
+@pytest.mark.integration_test
 @pytest.mark.requires_eclipse
 @pytest.mark.usefixtures("use_tmpdir", "init_eclrun_config")
 def test_failed_run_OK(source_root):
@@ -204,6 +210,7 @@ def test_failed_run_OK(source_root):
     ecl_run.run(econfig, ["SPE1_ERROR", "--version=2019.3", "--ignore-errors"])
 
 
+@pytest.mark.integration_test
 @pytest.mark.requires_eclipse
 @pytest.mark.usefixtures("use_tmpdir", "init_eclrun_config")
 def test_no_hdf5_output_by_default_with_ecl100(source_root):
@@ -217,6 +224,7 @@ def test_no_hdf5_output_by_default_with_ecl100(source_root):
     assert not os.path.exists("SPE1.h5")
 
 
+@pytest.mark.integration_test
 @pytest.mark.requires_eclipse
 @pytest.mark.usefixtures("use_tmpdir", "init_eclrun_config")
 def test_flag_to_produce_hdf5_output_with_ecl100(source_root):
@@ -230,6 +238,7 @@ def test_flag_to_produce_hdf5_output_with_ecl100(source_root):
     assert os.path.exists("SPE1.h5")
 
 
+@pytest.mark.integration_test
 @pytest.mark.requires_eclipse
 @pytest.mark.usefixtures("use_tmpdir", "init_eclrun_config")
 def test_mpi_run(source_root):
@@ -243,6 +252,7 @@ def test_mpi_run(source_root):
     assert os.path.getsize("SPE1_PARALLEL.OUT") > 0
 
 
+@pytest.mark.integration_test
 @pytest.mark.requires_eclipse
 @pytest.mark.usefixtures("use_tmpdir", "init_eclrun_config")
 def test_summary_block(source_root):
diff --git a/tests/integration_tests/share/test_ecl_versioning_config.py b/tests/unit_tests/resources/test_ecl_versioning_config.py
similarity index 100%
rename from tests/integration_tests/share/test_ecl_versioning_config.py
rename to tests/unit_tests/resources/test_ecl_versioning_config.py
diff --git a/tests/integration_tests/share/test_forward_models.py b/tests/unit_tests/resources/test_forward_models.py
similarity index 100%
rename from tests/integration_tests/share/test_forward_models.py
rename to tests/unit_tests/resources/test_forward_models.py
diff --git a/tests/integration_tests/share/test_opm_flow.py b/tests/unit_tests/resources/test_opm_flow.py
similarity index 99%
rename from tests/integration_tests/share/test_opm_flow.py
rename to tests/unit_tests/resources/test_opm_flow.py
index a2be25cadd0..ddf79bf97a0 100644
--- a/tests/integration_tests/share/test_opm_flow.py
+++ b/tests/unit_tests/resources/test_opm_flow.py
@@ -77,6 +77,7 @@ def test_ecl_run_make_LSB_MCPU_machine_list():
     ]
 
 
+@pytest.mark.integration_test
 @flow_installed
 def test_flow(init_flow_config, source_root):
     shutil.copy(source_root / "test-data/eclipse/SPE1.DATA", "SPE1.DATA")
@@ -99,6 +100,7 @@ def test_flow(init_flow_config, source_root):
         ecl_run.run(flow_config, ["SPE1.DATA", "--version=no/such/version"])
 
 
+@pytest.mark.integration_test
 @flow_installed
 def test_flow_with_mpi(init_flow_config, source_root):
     """This only tests that ERT will be able to start flow on a data deck with
diff --git a/tests/integration_tests/share/test_shell.py b/tests/unit_tests/resources/test_shell.py
similarity index 100%
rename from tests/integration_tests/share/test_shell.py
rename to tests/unit_tests/resources/test_shell.py
diff --git a/tests/integration_tests/share/test_subprocess.py b/tests/unit_tests/resources/test_subprocess.py
similarity index 97%
rename from tests/integration_tests/share/test_subprocess.py
rename to tests/unit_tests/resources/test_subprocess.py
index 40558acc080..69566f36ed1 100644
--- a/tests/integration_tests/share/test_subprocess.py
+++ b/tests/unit_tests/resources/test_subprocess.py
@@ -4,10 +4,9 @@
 
 import pytest
 
+from tests import import_from_location
 from tests.utils import SOURCE_DIR
 
-from ._import_from_location import import_from_location
-
 # import ecl_config and ecl_run.py from ert/forward-models/res/script
 # package-data path which. These are kept out of the ert package to avoid the
 # overhead of importing ert. This is necessary as these may be invoked as a
diff --git a/tests/integration_tests/share/test_templating.py b/tests/unit_tests/resources/test_templating.py
similarity index 100%
rename from tests/integration_tests/share/test_templating.py
rename to tests/unit_tests/resources/test_templating.py
diff --git a/tests/integration_tests/scheduler/__init__.py b/tests/unit_tests/scheduler/__init__.py
similarity index 100%
rename from tests/integration_tests/scheduler/__init__.py
rename to tests/unit_tests/scheduler/__init__.py
diff --git a/tests/integration_tests/scheduler/bin/bhist b/tests/unit_tests/scheduler/bin/bhist
similarity index 100%
rename from tests/integration_tests/scheduler/bin/bhist
rename to tests/unit_tests/scheduler/bin/bhist
diff --git a/tests/integration_tests/scheduler/bin/bhist.py b/tests/unit_tests/scheduler/bin/bhist.py
similarity index 100%
rename from tests/integration_tests/scheduler/bin/bhist.py
rename to tests/unit_tests/scheduler/bin/bhist.py
diff --git a/tests/integration_tests/scheduler/bin/bjobs b/tests/unit_tests/scheduler/bin/bjobs
similarity index 100%
rename from tests/integration_tests/scheduler/bin/bjobs
rename to tests/unit_tests/scheduler/bin/bjobs
diff --git a/tests/integration_tests/scheduler/bin/bjobs.py b/tests/unit_tests/scheduler/bin/bjobs.py
similarity index 100%
rename from tests/integration_tests/scheduler/bin/bjobs.py
rename to tests/unit_tests/scheduler/bin/bjobs.py
diff --git a/tests/integration_tests/scheduler/bin/bkill b/tests/unit_tests/scheduler/bin/bkill
similarity index 100%
rename from tests/integration_tests/scheduler/bin/bkill
rename to tests/unit_tests/scheduler/bin/bkill
diff --git a/tests/integration_tests/scheduler/bin/bkill.py b/tests/unit_tests/scheduler/bin/bkill.py
similarity index 100%
rename from tests/integration_tests/scheduler/bin/bkill.py
rename to tests/unit_tests/scheduler/bin/bkill.py
diff --git a/tests/integration_tests/scheduler/bin/bsub b/tests/unit_tests/scheduler/bin/bsub
similarity index 100%
rename from tests/integration_tests/scheduler/bin/bsub
rename to tests/unit_tests/scheduler/bin/bsub
diff --git a/tests/integration_tests/scheduler/bin/lsfrunner b/tests/unit_tests/scheduler/bin/lsfrunner
similarity index 100%
rename from tests/integration_tests/scheduler/bin/lsfrunner
rename to tests/unit_tests/scheduler/bin/lsfrunner
diff --git a/tests/integration_tests/scheduler/bin/qdel b/tests/unit_tests/scheduler/bin/qdel
similarity index 100%
rename from tests/integration_tests/scheduler/bin/qdel
rename to tests/unit_tests/scheduler/bin/qdel
diff --git a/tests/integration_tests/scheduler/bin/qstat b/tests/unit_tests/scheduler/bin/qstat
similarity index 100%
rename from tests/integration_tests/scheduler/bin/qstat
rename to tests/unit_tests/scheduler/bin/qstat
diff --git a/tests/integration_tests/scheduler/bin/qstat.py b/tests/unit_tests/scheduler/bin/qstat.py
similarity index 100%
rename from tests/integration_tests/scheduler/bin/qstat.py
rename to tests/unit_tests/scheduler/bin/qstat.py
diff --git a/tests/integration_tests/scheduler/bin/qsub b/tests/unit_tests/scheduler/bin/qsub
similarity index 100%
rename from tests/integration_tests/scheduler/bin/qsub
rename to tests/unit_tests/scheduler/bin/qsub
diff --git a/tests/integration_tests/scheduler/bin/runner b/tests/unit_tests/scheduler/bin/runner
similarity index 100%
rename from tests/integration_tests/scheduler/bin/runner
rename to tests/unit_tests/scheduler/bin/runner
diff --git a/tests/integration_tests/scheduler/bin/sbatch b/tests/unit_tests/scheduler/bin/sbatch
similarity index 100%
rename from tests/integration_tests/scheduler/bin/sbatch
rename to tests/unit_tests/scheduler/bin/sbatch
diff --git a/tests/integration_tests/scheduler/bin/sbatch.py b/tests/unit_tests/scheduler/bin/sbatch.py
similarity index 100%
rename from tests/integration_tests/scheduler/bin/sbatch.py
rename to tests/unit_tests/scheduler/bin/sbatch.py
diff --git a/tests/integration_tests/scheduler/bin/scancel b/tests/unit_tests/scheduler/bin/scancel
similarity index 100%
rename from tests/integration_tests/scheduler/bin/scancel
rename to tests/unit_tests/scheduler/bin/scancel
diff --git a/tests/integration_tests/scheduler/bin/scontrol b/tests/unit_tests/scheduler/bin/scontrol
similarity index 100%
rename from tests/integration_tests/scheduler/bin/scontrol
rename to tests/unit_tests/scheduler/bin/scontrol
diff --git a/tests/integration_tests/scheduler/bin/scontrol.py b/tests/unit_tests/scheduler/bin/scontrol.py
similarity index 100%
rename from tests/integration_tests/scheduler/bin/scontrol.py
rename to tests/unit_tests/scheduler/bin/scontrol.py
diff --git a/tests/integration_tests/scheduler/bin/squeue b/tests/unit_tests/scheduler/bin/squeue
similarity index 100%
rename from tests/integration_tests/scheduler/bin/squeue
rename to tests/unit_tests/scheduler/bin/squeue
diff --git a/tests/integration_tests/scheduler/bin/squeue.py b/tests/unit_tests/scheduler/bin/squeue.py
similarity index 100%
rename from tests/integration_tests/scheduler/bin/squeue.py
rename to tests/unit_tests/scheduler/bin/squeue.py
diff --git a/tests/unit_tests/scheduler/conftest.py b/tests/unit_tests/scheduler/conftest.py
index 2179424b470..dc8b7d41a5e 100644
--- a/tests/unit_tests/scheduler/conftest.py
+++ b/tests/unit_tests/scheduler/conftest.py
@@ -1,4 +1,9 @@
+from __future__ import annotations
+
 import asyncio
+import os
+import sys
+from pathlib import Path
 from typing import Any, Coroutine, Literal
 
 import pytest
@@ -68,3 +73,16 @@ def done() -> bool:
 @pytest.fixture
 def mock_event():
     return MockEvent
+
+
+def mock_bin(monkeypatch, tmp_path):
+    bin_path = Path(__file__).parent / "bin"
+
+    monkeypatch.setenv("PATH", f"{bin_path}:{os.environ['PATH']}")
+    monkeypatch.setenv("PYTEST_TMP_PATH", str(tmp_path))
+    monkeypatch.setenv("PYTHON", sys.executable)
+
+
+@pytest.fixture
+def job_name(request) -> str:
+    return request.node.name.split("[")[0]
diff --git a/tests/integration_tests/scheduler/test_generic_driver.py b/tests/unit_tests/scheduler/test_generic_driver.py
similarity index 98%
rename from tests/integration_tests/scheduler/test_generic_driver.py
rename to tests/unit_tests/scheduler/test_generic_driver.py
index b22526846e0..cdcce086207 100644
--- a/tests/integration_tests/scheduler/test_generic_driver.py
+++ b/tests/unit_tests/scheduler/test_generic_driver.py
@@ -61,6 +61,7 @@ async def test_submit(driver: Driver, tmp_path, job_name):
     assert (tmp_path / "test").read_text(encoding="utf-8") == "test\n"
 
 
+@pytest.mark.integration_test
 async def test_submit_something_that_fails(driver: Driver, tmp_path, job_name):
     os.chdir(tmp_path)
     finished_called = False
@@ -87,6 +88,7 @@ async def finished(iens, returncode):
     assert finished_called
 
 
+@pytest.mark.integration_test
 async def test_kill_gives_correct_state(driver: Driver, tmp_path, request):
     os.chdir(tmp_path)
     aborted_called = False
@@ -121,6 +123,7 @@ async def finished(iens, returncode):
     assert aborted_called
 
 
+@pytest.mark.integration_test
 @pytest.mark.flaky(reruns=10)
 async def test_repeated_submit_same_iens(driver: Driver, tmp_path):
     """Submits are allowed to be repeated for the same iens, and are to be
@@ -147,6 +150,7 @@ async def test_repeated_submit_same_iens(driver: Driver, tmp_path):
     assert Path("submissionrace").read_text(encoding="utf-8") == "submit2\n"
 
 
+@pytest.mark.integration_test
 @pytest.mark.flaky(reruns=5)
 async def test_kill_actually_kills(driver: Driver, tmp_path, pytestconfig):
     os.chdir(tmp_path)
@@ -215,7 +219,6 @@ async def test_num_cpu_sets_env_variables(driver: Driver, tmp_path, job_name):
         assert "NCPUS=2" in env_lines
 
 
-@pytest.mark.integration_test
 async def test_execute_with_retry_exits_on_filenotfounderror(driver: Driver, caplog):
     caplog.set_level(logging.DEBUG)
     invalid_cmd = ["/usr/bin/foo", "bar"]
diff --git a/tests/unit_tests/scheduler/test_lsf_driver.py b/tests/unit_tests/scheduler/test_lsf_driver.py
index 4d8c7d3e14a..1622b085b8c 100644
--- a/tests/unit_tests/scheduler/test_lsf_driver.py
+++ b/tests/unit_tests/scheduler/test_lsf_driver.py
@@ -1,7 +1,12 @@
 import asyncio
+import json
 import logging
 import os
+import random
+import re
 import stat
+import string
+import sys
 import time
 from contextlib import ExitStack as does_not_raise
 from pathlib import Path
@@ -12,10 +17,10 @@
 import pytest
 from hypothesis import given
 from hypothesis import strategies as st
-from tests.utils import poll
 
 from ert.config import QueueConfig
 from ert.scheduler import LsfDriver, create_driver
+from ert.scheduler.driver import SIGNAL_OFFSET
 from ert.scheduler.lsf_driver import (
     FLAKY_SSH_RETURNCODE,
     LSF_FAILED_JOB,
@@ -33,6 +38,9 @@
     parse_bhist,
     parse_bjobs,
 )
+from tests.utils import poll
+
+from .conftest import mock_bin
 
 valid_jobstates: Collection[str] = list(get_args(JobState))
 
@@ -214,29 +222,13 @@ async def test_submit_sets_stderr():
 
 
 @pytest.mark.usefixtures("capturing_bsub")
-async def test_submit_with_resource_requirement():
-    driver = LsfDriver(resource_requirement="select[cs && x86_64Linux]")
-    await driver.submit(0, "sleep")
-    assert "-R select[cs && x86_64Linux]" in Path("captured_bsub_args").read_text(
-        encoding="utf-8"
-    )
-    assert "hname" not in Path("captured_bsub_args").read_text(encoding="utf-8")
-
-
-@pytest.mark.usefixtures("capturing_bsub")
-async def test_submit_with_num_cpu():
-    driver = LsfDriver()
-    await driver.submit(0, "sleep", num_cpu=4)
-    assert "-n 4" in Path("captured_bsub_args").read_text(encoding="utf-8")
-
-
-@pytest.mark.usefixtures("capturing_bsub")
-async def test_submit_with_realization_memory():
+async def test_submit_with_realization_memory_with_bsub_capture():
     driver = LsfDriver()
     await driver.submit(0, "sleep", realization_memory=1024**2)
     assert "-R rusage[mem=1]" in Path("captured_bsub_args").read_text(encoding="utf-8")
 
 
+@pytest.mark.integration_test
 @pytest.mark.parametrize(
     "bsub_script, expectation",
     [
@@ -922,3 +914,339 @@ async def test_kill_before_submit_logs_error(caplog):
     await driver.kill(0)
     assert "ERROR" in caplog.text
     assert "realization 0 has never been submitted" in caplog.text
+
+
+@pytest.fixture(autouse=True)
+def mock_lsf(pytestconfig, monkeypatch, tmp_path):
+    if pytestconfig.getoption("lsf"):
+        # User provided --lsf, which means we should use the actual LSF
+        # cluster without mocking anything.""
+        return
+    mock_bin(monkeypatch, tmp_path)
+
+
+@pytest.fixture
+def not_found_bjobs(monkeypatch, tmp_path):
+    """This creates a bjobs command that will always claim a job
+    does not exist, mimicking a job that has 'fallen out of the bjobs cache'."""
+    os.chdir(tmp_path)
+    bin_path = tmp_path / "bin"
+    bin_path.mkdir()
+    monkeypatch.setenv("PATH", f"{bin_path}:{os.environ['PATH']}")
+    bjobs_path = bin_path / "bjobs"
+    bjobs_path.write_text(
+        "#!/bin/sh\n" 'echo "Job <$1> is not found"',
+        encoding="utf-8",
+    )
+    bjobs_path.chmod(bjobs_path.stat().st_mode | stat.S_IEXEC)
+
+
+async def test_lsf_stdout_file(tmp_path, job_name):
+    os.chdir(tmp_path)
+    driver = LsfDriver()
+    await driver.submit(0, "sh", "-c", "echo yay", name=job_name)
+    await poll(driver, {0})
+    lsf_stdout = Path(f"{job_name}.LSF-stdout").read_text(encoding="utf-8")
+    assert Path(
+        f"{job_name}.LSF-stdout"
+    ).exists(), "LSF system did not write output file"
+
+    assert "Sender: " in lsf_stdout, "LSF stdout should always start with 'Sender:'"
+    assert "The output (if any) follows:" in lsf_stdout
+    assert "yay" in lsf_stdout
+
+
+async def test_lsf_dumps_stderr_to_file(tmp_path, job_name):
+    os.chdir(tmp_path)
+    driver = LsfDriver()
+    failure_message = "failURE"
+    await driver.submit(0, "sh", "-c", f"echo {failure_message} >&2", name=job_name)
+    await poll(driver, {0})
+    assert Path(
+        f"{job_name}.LSF-stderr"
+    ).exists(), "LSF system did not write stderr file"
+
+    assert (
+        Path(f"{job_name}.LSF-stderr").read_text(encoding="utf-8").strip()
+        == failure_message
+    )
+
+
+def generate_random_text(size):
+    letters = string.ascii_letters
+    return "".join(random.choice(letters) for i in range(size))
+
+
+@pytest.mark.parametrize("tail_chars_to_read", [(5), (50), (500), (700)])
+async def test_lsf_can_retrieve_stdout_and_stderr(
+    tmp_path, job_name, tail_chars_to_read
+):
+    os.chdir(tmp_path)
+    driver = LsfDriver()
+    num_written_characters = 600
+    _out = generate_random_text(num_written_characters)
+    _err = generate_random_text(num_written_characters)
+    await driver.submit(0, "sh", "-c", f"echo {_out} && echo {_err} >&2", name=job_name)
+    await poll(driver, {0})
+    message = driver.read_stdout_and_stderr_files(
+        runpath=".",
+        job_name=job_name,
+        num_characters_to_read_from_end=tail_chars_to_read,
+    )
+
+    stderr_txt = Path(f"{job_name}.LSF-stderr").read_text(encoding="utf-8").strip()
+    stdout_txt = Path(f"{job_name}.LSF-stdout").read_text(encoding="utf-8").strip()
+
+    assert stderr_txt[-min(tail_chars_to_read, num_written_characters) + 2 :] in message
+    assert stdout_txt[-min(tail_chars_to_read, num_written_characters) + 2 :] in message
+
+
+async def test_lsf_cannot_retrieve_stdout_and_stderr(tmp_path, job_name):
+    os.chdir(tmp_path)
+    driver = LsfDriver()
+    num_written_characters = 600
+    _out = generate_random_text(num_written_characters)
+    _err = generate_random_text(num_written_characters)
+    await driver.submit(0, "sh", "-c", f"echo {_out} && echo {_err} >&2", name=job_name)
+    await poll(driver, {0})
+    # let's remove the output files
+    os.remove(job_name + ".LSF-stderr")
+    os.remove(job_name + ".LSF-stdout")
+    message = driver.read_stdout_and_stderr_files(
+        runpath=".",
+        job_name=job_name,
+        num_characters_to_read_from_end=1,
+    )
+    assert "LSF-stderr:\nNo output file" in message
+    assert "LSF-stdout:\nNo output file" in message
+
+
+@pytest.mark.parametrize("explicit_runpath", [(True), (False)])
+async def test_lsf_info_file_in_runpath(explicit_runpath, tmp_path, job_name):
+    os.chdir(tmp_path)
+    driver = LsfDriver()
+    (tmp_path / "some_runpath").mkdir()
+    os.chdir(tmp_path)
+    effective_runpath = tmp_path / "some_runpath" if explicit_runpath else tmp_path
+    await driver.submit(
+        0,
+        "sh",
+        "-c",
+        "exit 0",
+        runpath=tmp_path / "some_runpath" if explicit_runpath else None,
+        name=job_name,
+    )
+
+    await poll(driver, {0})
+
+    effective_runpath = tmp_path / "some_runpath" if explicit_runpath else tmp_path
+    assert json.loads(
+        (effective_runpath / "lsf_info.json").read_text(encoding="utf-8")
+    ).keys() == {"job_id"}
+
+
+@pytest.mark.integration_test
+async def test_submit_to_named_queue(tmp_path, caplog, job_name):
+    """If the environment variable _ERT_TEST_ALTERNATIVE_QUEUE is defined
+    a job will be attempted submitted to that queue.
+
+    As Ert does not keep track of which queue a job is executed in, we can only
+    test for success for the job."""
+    os.chdir(tmp_path)
+    driver = LsfDriver(queue_name=os.getenv("_ERT_TESTS_ALTERNATIVE_QUEUE"))
+    await driver.submit(0, "sh", "-c", f"echo test > {tmp_path}/test", name=job_name)
+    await poll(driver, {0})
+
+    assert (tmp_path / "test").read_text(encoding="utf-8") == "test\n"
+
+
+@pytest.mark.usefixtures("use_tmpdir")
+async def test_submit_with_resource_requirement(job_name):
+    resource_requirement = "select[cs && x86_64Linux]"
+    driver = LsfDriver(resource_requirement=resource_requirement)
+    await driver.submit(0, "sh", "-c", "echo test>test", name=job_name)
+    await poll(driver, {0})
+
+    assert Path("test").read_text(encoding="utf-8") == "test\n"
+
+
+@pytest.mark.usefixtures("capturing_bsub")
+async def test_submit_with_resource_requirement_with_bsub_capture():
+    driver = LsfDriver(resource_requirement="select[cs && x86_64Linux]")
+    await driver.submit(0, "sleep")
+    assert "-R select[cs && x86_64Linux]" in Path("captured_bsub_args").read_text(
+        encoding="utf-8"
+    )
+    assert "hname" not in Path("captured_bsub_args").read_text(encoding="utf-8")
+
+
+@pytest.mark.usefixtures("use_tmpdir")
+async def test_submit_with_num_cpu(pytestconfig, job_name):
+    if not pytestconfig.getoption("lsf"):
+        return
+
+    num_cpu = 2
+    driver = LsfDriver()
+    await driver.submit(0, "sh", "-c", "echo test>test", name=job_name, num_cpu=num_cpu)
+    job_id = driver._iens2jobid[0]
+    await poll(driver, {0})
+
+    process = await asyncio.create_subprocess_exec(
+        "bhist",
+        "-l",
+        job_id,
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.PIPE,
+    )
+    stdout, stderr = await process.communicate()
+    stdout_no_whitespaces = re.sub(r"\s+", "", stdout.decode())
+    matches = re.search(r".*([0-9]+)ProcessorsRequested.*", stdout_no_whitespaces)
+    assert matches and matches[1] == str(
+        num_cpu
+    ), f"Could not verify processor allocation from stdout: {stdout}, stderr: {stderr}"
+
+    assert Path("test").read_text(encoding="utf-8") == "test\n"
+
+
+@pytest.mark.usefixtures("capturing_bsub")
+async def test_submit_with_num_cpu_with_bsub_capture():
+    driver = LsfDriver()
+    await driver.submit(0, "sleep", num_cpu=4)
+    assert "-n 4" in Path("captured_bsub_args").read_text(encoding="utf-8")
+
+
+@pytest.mark.integration_test
+@pytest.mark.usefixtures("use_tmpdir")
+async def test_submit_with_realization_memory(pytestconfig, job_name):
+    if not pytestconfig.getoption("lsf"):
+        pytest.skip("Mocked LSF driver does not provide bhist")
+
+    realization_memory_bytes = 1024 * 1024
+    driver = LsfDriver()
+    await driver.submit(
+        0,
+        "sh",
+        "-c",
+        "echo test>test",
+        name=job_name,
+        realization_memory=realization_memory_bytes,
+    )
+    job_id = driver._iens2jobid[0]
+    await poll(driver, {0})
+
+    process = await asyncio.create_subprocess_exec(
+        "bhist",
+        "-l",
+        job_id,
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.PIPE,
+    )
+    stdout, _ = await process.communicate()
+    assert "rusage[mem=1]" in stdout.decode(encoding="utf-8")
+
+    assert Path("test").read_text(encoding="utf-8") == "test\n"
+
+
+@pytest.mark.integration_test
+async def test_polling_bhist_fallback(not_found_bjobs, caplog, job_name):
+    caplog.set_level(logging.DEBUG)
+    driver = LsfDriver()
+    Path("mock_jobs").mkdir()
+    Path("mock_jobs/pendingtimemillis").write_text("100", encoding="utf-8")
+    driver._poll_period = 0.01
+
+    bhist_called = False
+    original_bhist_method = driver._poll_once_by_bhist
+
+    def mock_poll_once_by_bhist(*args, **kwargs):
+        nonlocal bhist_called
+        bhist_called = True
+        return original_bhist_method(*args, **kwargs)
+
+    driver._poll_once_by_bhist = mock_poll_once_by_bhist
+
+    await driver.submit(0, "sh", "-c", "sleep 1", name=job_name)
+    job_id = list(driver._iens2jobid.values())[0]
+    await poll(driver, {0})
+    assert "bhist is used" in caplog.text
+    assert bhist_called
+    assert driver._bhist_cache and job_id in driver._bhist_cache
+
+
+@pytest.mark.integration_test
+@pytest.mark.flaky(rerun=10)
+async def test_that_kill_before_submit_is_finished_works(
+    tmp_path, monkeypatch, caplog, pytestconfig
+):
+    """This test asserts that it is possible to issue a kill command
+    to a realization right after it has been submitted (as in driver.submit()).
+
+    The bug intended to catch is if the driver gives up on killing before submission
+    is not done yet, it is important not to let the realization through in that scenario.
+
+    The design of the test alludes to much more flakyness than what is probable in reality,
+    thus reruns are allowed to make this pass.
+    """
+    os.chdir(tmp_path)
+
+    if pytestconfig.getoption("lsf"):
+        # Allow more time when tested on a real compute cluster to avoid false positives.
+        job_kill_window = 10
+        test_grace_time = 20
+    elif sys.platform.startswith("darwin"):
+        # Mitigate flakiness on low-power test nodes
+        job_kill_window = 5
+        test_grace_time = 10
+    else:
+        job_kill_window = 2
+        test_grace_time = 4
+
+    bin_path = tmp_path / "bin"
+    bin_path.mkdir()
+    monkeypatch.setenv("PATH", f"{bin_path}:{os.environ['PATH']}")
+    bsub_path = bin_path / "slow_bsub"
+    bsub_path.write_text(
+        "#!/bin/sh\nsleep 0.1\nbsub $@",
+        encoding="utf-8",
+    )
+    bsub_path.chmod(bsub_path.stat().st_mode | stat.S_IEXEC)
+
+    caplog.set_level(logging.DEBUG)
+    driver = LsfDriver(bsub_cmd="slow_bsub")
+
+    # Allow submit and kill to be interleaved by asyncio by issuing
+    # submit() in its own asyncio Task:
+    asyncio.create_task(
+        driver.submit(
+            # The sleep is the time window in which we can kill the job before
+            # the unwanted finish message appears on disk.
+            0,
+            "sh",
+            "-c",
+            f"sleep {job_kill_window}; touch {tmp_path}/survived",
+        )
+    )
+    await asyncio.sleep(0.01)  # Allow submit task to start executing
+    await driver.kill(0)  # This will wait until the submit is done and then kill
+
+    async def finished(iens: int, returncode: int):
+        SIGTERM = 15
+        assert iens == 0
+        # If the kill is issued before the job really starts, you will not
+        # get SIGTERM but rather LSF_FAILED_JOB. Whether SIGNAL_OFFSET is
+        # added or not depends on various shell configurations and is a
+        # detail we do not want to track.
+        assert returncode in (SIGTERM, SIGNAL_OFFSET + SIGTERM, LSF_FAILED_JOB)
+
+    await poll(driver, {0}, finished=finished)
+    assert "ERROR" not in str(caplog.text)
+
+    # In case the return value of the killed job is correct but the submitted
+    # shell script is still running for whatever reason, a file called
+    # "survived" will appear on disk. Wait for it, and then ensure it is not
+    # there.
+    assert test_grace_time > job_kill_window, "Wrong test setup"
+    await asyncio.sleep(test_grace_time)
+    assert not Path(
+        "survived"
+    ).exists(), "The process children of the job should also have been killed"
diff --git a/tests/unit_tests/scheduler/test_openpbs_driver.py b/tests/unit_tests/scheduler/test_openpbs_driver.py
index 2ad8821c1a2..82dc0b81046 100644
--- a/tests/unit_tests/scheduler/test_openpbs_driver.py
+++ b/tests/unit_tests/scheduler/test_openpbs_driver.py
@@ -4,6 +4,7 @@
 import os
 import shlex
 import stat
+from functools import partial
 from pathlib import Path
 from textwrap import dedent
 from typing import Dict, List
@@ -11,10 +12,9 @@
 import pytest
 from hypothesis import given
 from hypothesis import strategies as st
-from tests.conftest import QSTAT_HEADER, QSTAT_HEADER_FORMAT
-from tests.utils import poll
 
-from ert.scheduler import OpenPBSDriver
+from ert.cli.main import ErtCliError
+from ert.mode_definitions import ENSEMBLE_EXPERIMENT_MODE
 from ert.scheduler.openpbs_driver import (
     JOB_STATES,
     QDEL_JOB_HAS_FINISHED,
@@ -24,12 +24,18 @@
     QSUB_PREMATURE_END_OF_MESSAGE,
     FinishedEvent,
     FinishedJob,
+    OpenPBSDriver,
     QueuedJob,
     RunningJob,
     StartedEvent,
     _create_job_class,
     _parse_jobs_dict,
 )
+from tests.conftest import QSTAT_HEADER, QSTAT_HEADER_FORMAT
+from tests.ui_tests.cli.run_cli import run_cli
+from tests.utils import poll
+
+from .conftest import mock_bin
 
 
 @given(st.lists(st.sampled_from(JOB_STATES)))
@@ -576,3 +582,61 @@ async def test_submit_project_code():
     assert f" -A {project_code} " in Path("captured_qsub_args").read_text(
         encoding="utf-8"
     )
+
+
+@pytest.fixture(autouse=True)
+def mock_openpbs(pytestconfig, monkeypatch, tmp_path):
+    if pytestconfig.getoption("openpbs"):
+        # User provided --openpbs, which means we should use the actual OpenPBS
+        # cluster without mocking anything.
+        return
+    mock_bin(monkeypatch, tmp_path)
+
+
+@pytest.fixture()
+def queue_name_config():
+    if queue_name := os.getenv("_ERT_TESTS_DEFAULT_QUEUE_NAME"):
+        return f"\nQUEUE_OPTION TORQUE QUEUE {queue_name}"
+    return ""
+
+
+async def mock_failure(message, *args, **kwargs):
+    raise RuntimeError(message)
+
+
+@pytest.mark.integration_test
+@pytest.mark.usefixtures("copy_poly_case")
+def test_openpbs_driver_with_poly_example_failing_submit_fails_ert_and_propagates_exception_to_user(
+    monkeypatch, caplog, queue_name_config
+):
+    monkeypatch.setattr(
+        OpenPBSDriver, "submit", partial(mock_failure, "Submit job failed")
+    )
+    with open("poly.ert", mode="a+", encoding="utf-8") as f:
+        f.write("QUEUE_SYSTEM TORQUE\nNUM_REALIZATIONS 2")
+        f.write(queue_name_config)
+    with pytest.raises(ErtCliError):
+        run_cli(
+            ENSEMBLE_EXPERIMENT_MODE,
+            "poly.ert",
+        )
+    assert "RuntimeError: Submit job failed" in caplog.text
+
+
+@pytest.mark.integration_test
+@pytest.mark.usefixtures("copy_poly_case")
+def test_openpbs_driver_with_poly_example_failing_poll_fails_ert_and_propagates_exception_to_user(
+    monkeypatch, caplog, queue_name_config
+):
+    monkeypatch.setattr(
+        OpenPBSDriver, "poll", partial(mock_failure, "Status polling failed")
+    )
+    with open("poly.ert", mode="a+", encoding="utf-8") as f:
+        f.write("QUEUE_SYSTEM TORQUE\nNUM_REALIZATIONS 2")
+        f.write(queue_name_config)
+    with pytest.raises(ErtCliError):
+        run_cli(
+            ENSEMBLE_EXPERIMENT_MODE,
+            "poly.ert",
+        )
+    assert "RuntimeError: Status polling failed" in caplog.text
diff --git a/tests/unit_tests/scheduler/test_slurm_driver.py b/tests/unit_tests/scheduler/test_slurm_driver.py
index 3f104afb012..bf920a0e67c 100644
--- a/tests/unit_tests/scheduler/test_slurm_driver.py
+++ b/tests/unit_tests/scheduler/test_slurm_driver.py
@@ -1,5 +1,10 @@
+import asyncio
+import logging
 import os
+import random
 import stat
+import string
+import sys
 from contextlib import ExitStack as does_not_raise
 from pathlib import Path
 
@@ -8,6 +13,9 @@
 from hypothesis import strategies as st
 
 from ert.scheduler import SlurmDriver
+from tests.utils import poll
+
+from .conftest import mock_bin
 
 
 def nonempty_string_without_whitespace():
@@ -173,6 +181,7 @@ async def test_max_runtime_is_set(max_runtime):
     )
 
 
+@pytest.mark.integration_test
 @pytest.mark.parametrize(
     "sbatch_script, expectation",
     [
@@ -232,3 +241,173 @@ async def test_kill_before_submit_logs_error(caplog):
     await driver.kill(0)
     assert "ERROR" in caplog.text
     assert "realization 0 has never been submitted" in caplog.text
+
+
+@pytest.fixture(autouse=True)
+def mock_slurm(pytestconfig, monkeypatch, tmp_path):
+    if pytestconfig.getoption("slurm"):
+        # User provided --slurm, which means we should use an actual Slurm
+        # cluster without mocking anything.""
+        return
+    mock_bin(monkeypatch, tmp_path)
+
+
+async def test_slurm_stdout_file(tmp_path, job_name):
+    os.chdir(tmp_path)
+    driver = SlurmDriver()
+    await driver.submit(0, "sh", "-c", "echo yay", name=job_name)
+    await poll(driver, {0})
+    slurm_stdout = Path(f"{job_name}.stdout").read_text(encoding="utf-8")
+    assert Path(f"{job_name}.stdout").exists(), "Slurm system did not write output file"
+    assert "yay" in slurm_stdout
+
+
+async def test_slurm_dumps_stderr_to_file(tmp_path, job_name):
+    os.chdir(tmp_path)
+    driver = SlurmDriver()
+    failure_message = "failURE"
+    await driver.submit(0, "sh", "-c", f"echo {failure_message} >&2", name=job_name)
+    await poll(driver, {0})
+    assert Path(f"{job_name}.stderr").exists(), "Slurm system did not write stderr file"
+
+    assert (
+        Path(f"{job_name}.stderr").read_text(encoding="utf-8").strip()
+        == failure_message
+    )
+
+
+def generate_random_text(size):
+    letters = string.ascii_letters
+    return "".join(random.choice(letters) for _ in range(size))
+
+
+@pytest.mark.parametrize("tail_chars_to_read", [(5), (50), (500), (700)])
+async def test_slurm_can_retrieve_stdout_and_stderr(
+    tmp_path, job_name, tail_chars_to_read
+):
+    os.chdir(tmp_path)
+    driver = SlurmDriver()
+    num_written_characters = 600
+    _out = generate_random_text(num_written_characters)
+    _err = generate_random_text(num_written_characters)
+    await driver.submit(0, "sh", "-c", f"echo {_out} && echo {_err} >&2", name=job_name)
+    await poll(driver, {0})
+    message = driver.read_stdout_and_stderr_files(
+        runpath=".",
+        job_name=job_name,
+        num_characters_to_read_from_end=tail_chars_to_read,
+    )
+
+    stderr_txt = Path(f"{job_name}.stderr").read_text(encoding="utf-8").strip()
+    stdout_txt = Path(f"{job_name}.stdout").read_text(encoding="utf-8").strip()
+
+    assert stderr_txt[-min(tail_chars_to_read, num_written_characters) + 2 :] in message
+    assert stdout_txt[-min(tail_chars_to_read, num_written_characters) + 2 :] in message
+
+
+@pytest.mark.integration_test
+async def test_submit_to_named_queue(tmp_path, job_name):
+    """If the environment variable _ERT_TEST_ALTERNATIVE_QUEUE is defined
+    a job will be attempted submitted to that queue.
+
+    * Note that what is called a "queue" in Ert is a "partition" in Slurm lingo.
+
+    As Ert does not keep track of which queue a job is executed in, we can only
+    test for success for the job."""
+    os.chdir(tmp_path)
+    driver = SlurmDriver(queue_name=os.getenv("_ERT_TESTS_ALTERNATIVE_QUEUE"))
+    await driver.submit(0, "sh", "-c", f"echo test > {tmp_path}/test", name=job_name)
+    await poll(driver, {0})
+
+    assert (tmp_path / "test").read_text(encoding="utf-8") == "test\n"
+
+
+@pytest.mark.usefixtures("use_tmpdir")
+async def test_submit_with_num_cpu(pytestconfig, job_name):
+    if not pytestconfig.getoption("slurm"):
+        return
+
+    num_cpu = 2
+    driver = SlurmDriver()
+    await driver.submit(0, "sh", "-c", "echo test>test", name=job_name, num_cpu=num_cpu)
+    job_id = driver._iens2jobid[0]
+    await poll(driver, {0})
+
+    process = await asyncio.create_subprocess_exec(
+        "scontrol",
+        "show",
+        "job",
+        job_id,
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.PIPE,
+    )
+    stdout, stderr = await process.communicate()
+    assert " NumCPUs=2 " in stdout.decode(
+        errors="ignore"
+    ), f"Could not verify processor allocation from stdout: {stdout}, stderr: {stderr}"
+
+    assert Path("test").read_text(encoding="utf-8") == "test\n"
+
+
+@pytest.mark.flaky(reruns=3)
+async def test_kill_before_submit_is_finished(
+    tmp_path, monkeypatch, caplog, pytestconfig
+):
+    os.chdir(tmp_path)
+
+    if pytestconfig.getoption("slurm"):
+        # Allow more time when tested on a real compute cluster to avoid false positives.
+        job_kill_window = 5
+        test_grace_time = 10
+    elif sys.platform.startswith("darwin"):
+        # Mitigate flakiness on low-power test nodes
+        job_kill_window = 5
+        test_grace_time = 10
+    else:
+        job_kill_window = 1
+        test_grace_time = 2
+
+    bin_path = tmp_path / "bin"
+    bin_path.mkdir()
+    monkeypatch.setenv("PATH", f"{bin_path}:{os.environ['PATH']}")
+    sbatch_path = bin_path / "slow_sbatch"
+    sbatch_path.write_text(
+        "#!/bin/sh\nsleep 0.1\nsbatch $@",
+        encoding="utf-8",
+    )
+    sbatch_path.chmod(sbatch_path.stat().st_mode | stat.S_IEXEC)
+
+    caplog.set_level(logging.DEBUG)
+    driver = SlurmDriver(sbatch_cmd="slow_sbatch")
+
+    # Allow submit and kill to be interleaved by asyncio by issuing
+    # submit() in its own asyncio Task:
+    asyncio.create_task(
+        driver.submit(
+            # The sleep is the time window in which we can kill the job before
+            # the unwanted finish message appears on disk.
+            0,
+            "sh",
+            "-c",
+            f"sleep {job_kill_window}; touch {tmp_path}/survived",
+        )
+    )
+    await asyncio.sleep(0.01)  # Allow submit task to start executing
+    await driver.kill(0)  # This will wait until the submit is done and then kill
+
+    async def finished(iens: int, returncode: int):
+        assert iens == 0
+        # Slurm assigns returncode 0 even when they are killed.
+        assert returncode == 0
+
+    await poll(driver, {0}, finished=finished)
+
+    # In case the return value of the killed job is correct but the submitted
+    # shell script is still running for whatever reason, a file called
+    # "survived" will appear on disk. Wait for it, and then ensure it is not
+    # there.
+    assert test_grace_time > job_kill_window, "Wrong test setup"
+    await asyncio.sleep(test_grace_time)
+    assert not Path(
+        "survived"
+    ).exists(), "The process children of the job should also have been killed"
diff --git a/tests/unit_tests/services/test_base_service.py b/tests/unit_tests/services/test_base_service.py
index b0191dd6e0a..aced4fc6d23 100644
--- a/tests/unit_tests/services/test_base_service.py
+++ b/tests/unit_tests/services/test_base_service.py
@@ -122,6 +122,7 @@ def test_long_lived(server, tmp_path):
     assert not (tmp_path / "dummy_server.json").exists()
 
 
+@pytest.mark.integration_test
 @pytest.mark.script(
     """\
 time.sleep(30)
@@ -160,6 +161,7 @@ def test_json_created(server):
         assert f.read()
 
 
+@pytest.mark.integration_test
 @pytest.mark.script(
     """\
 os.write(fd, b'{"authtoken": "test123", "urls": ["url"]}')
@@ -192,6 +194,7 @@ def test_singleton_start(server_script, tmp_path):
     assert not (tmp_path / "dummy_server.json").exists()
 
 
+@pytest.mark.integration_test
 @pytest.mark.script(
     """\
 time.sleep(1)
@@ -205,6 +208,7 @@ def test_singleton_connect(server_script):
         assert server is client
 
 
+@pytest.mark.integration_test
 @pytest.mark.script(
     """\
 os.write(fd, b'{"authtoken": "test123", "urls": ["url"]}')
@@ -247,6 +251,7 @@ def run(self):
     assert not (tmp_path / "dummy_server.json").exists()
 
 
+@pytest.mark.integration_test
 @pytest.mark.script(
     """\
 os.write(fd, b'{"authtoken": "test123", "urls": ["url"]}')
diff --git a/tests/unit_tests/shared/test_port_handler.py b/tests/unit_tests/shared/test_port_handler.py
index 9081b2292a1..b06a41d861b 100644
--- a/tests/unit_tests/shared/test_port_handler.py
+++ b/tests/unit_tests/shared/test_port_handler.py
@@ -384,6 +384,7 @@ def test_def_active_live_nok_nok(unused_tcp_port):
         )
 
 
+@pytest.mark.integration_test
 @pytest.mark.skipif(
     not sys.platform.startswith("darwin"), reason="MacOS-specific socket behaviour"
 )
@@ -430,6 +431,7 @@ def test_def_active_close_macos_nok_ok(unused_tcp_port):
     assert sock.fileno() != -1
 
 
+@pytest.mark.integration_test
 @pytest.mark.skipif(
     not sys.platform.startswith("linux"), reason="Linux-specific socket behaviour"
 )
@@ -472,6 +474,7 @@ def test_def_active_close_linux_nok_nok(unused_tcp_port):
         )
 
 
+@pytest.mark.integration_test
 @pytest.mark.skipif(
     not sys.platform.startswith("darwin"), reason="MacOS-specific socket behaviour"
 )
diff --git a/tests/unit_tests/simulator/test_batch_sim.py b/tests/unit_tests/simulator/test_batch_sim.py
index 281f3668c14..b8f7cd33516 100644
--- a/tests/unit_tests/simulator/test_batch_sim.py
+++ b/tests/unit_tests/simulator/test_batch_sim.py
@@ -146,6 +146,7 @@ def test_that_starting_with_invalid_key_raises_key_error(
         batch_simulator.start("case", _input, storage)
 
 
+@pytest.mark.integration_test
 def test_batch_simulation(batch_simulator, storage):
     # Starting a simulation which should actually run through.
     case_data = [
@@ -283,6 +284,7 @@ def test_that_batch_simulator_handles_invalid_suffixes_at_start(
         rsim.start("case", inp, storage)
 
 
+@pytest.mark.integration_test
 @pytest.mark.usefixtures("use_tmpdir")
 def test_batch_simulation_suffixes(batch_sim_example, storage):
     ert_config = batch_sim_example
@@ -434,6 +436,7 @@ def assertContextStatusOddFailures(batch_ctx: BatchContext, final_state_only=Fal
             assert status == JobState.FAILED
 
 
+@pytest.mark.integration_test
 def test_batch_ctx_status_failing_jobs(setup_case, storage):
     ert_config = setup_case("batch_sim", "batch_sim_sleep_and_fail.ert")
 
diff --git a/tests/integration_tests/snapshots/test_storage_migration/test_that_storage_matches/observations/FWPR b/tests/unit_tests/storage/snapshots/test_storage_migration/test_that_storage_matches/observations/FWPR
similarity index 100%
rename from tests/integration_tests/snapshots/test_storage_migration/test_that_storage_matches/observations/FWPR
rename to tests/unit_tests/storage/snapshots/test_storage_migration/test_that_storage_matches/observations/FWPR
diff --git a/tests/integration_tests/snapshots/test_storage_migration/test_that_storage_matches/observations/GEN b/tests/unit_tests/storage/snapshots/test_storage_migration/test_that_storage_matches/observations/GEN
similarity index 100%
rename from tests/integration_tests/snapshots/test_storage_migration/test_that_storage_matches/observations/GEN
rename to tests/unit_tests/storage/snapshots/test_storage_migration/test_that_storage_matches/observations/GEN
diff --git a/tests/integration_tests/snapshots/test_storage_migration/test_that_storage_matches/parameters b/tests/unit_tests/storage/snapshots/test_storage_migration/test_that_storage_matches/parameters
similarity index 100%
rename from tests/integration_tests/snapshots/test_storage_migration/test_that_storage_matches/parameters
rename to tests/unit_tests/storage/snapshots/test_storage_migration/test_that_storage_matches/parameters
diff --git a/tests/integration_tests/snapshots/test_storage_migration/test_that_storage_matches/responses b/tests/unit_tests/storage/snapshots/test_storage_migration/test_that_storage_matches/responses
similarity index 100%
rename from tests/integration_tests/snapshots/test_storage_migration/test_that_storage_matches/responses
rename to tests/unit_tests/storage/snapshots/test_storage_migration/test_that_storage_matches/responses
diff --git a/tests/integration_tests/snapshots/test_storage_migration/test_that_storage_matches/summary_data b/tests/unit_tests/storage/snapshots/test_storage_migration/test_that_storage_matches/summary_data
similarity index 100%
rename from tests/integration_tests/snapshots/test_storage_migration/test_that_storage_matches/summary_data
rename to tests/unit_tests/storage/snapshots/test_storage_migration/test_that_storage_matches/summary_data
diff --git a/tests/unit_tests/storage/test_local_storage.py b/tests/unit_tests/storage/test_local_storage.py
index 656563bf62e..abd594a0629 100644
--- a/tests/unit_tests/storage/test_local_storage.py
+++ b/tests/unit_tests/storage/test_local_storage.py
@@ -724,4 +724,4 @@ def teardown(self):
             shutil.rmtree(self.tmpdir)
 
 
-TestStorage = StatefulStorageTest.TestCase
+TestStorage = pytest.mark.integration_test(StatefulStorageTest.TestCase)
diff --git a/tests/integration_tests/test_storage_migration.py b/tests/unit_tests/storage/test_storage_migration.py
similarity index 99%
rename from tests/integration_tests/test_storage_migration.py
rename to tests/unit_tests/storage/test_storage_migration.py
index d7c1412d55a..456164e6f7a 100644
--- a/tests/integration_tests/test_storage_migration.py
+++ b/tests/unit_tests/storage/test_storage_migration.py
@@ -25,6 +25,7 @@ def copy_shared(tmp_path, block_storage_path):
         )
 
 
+@pytest.mark.integration_test
 @pytest.mark.usefixtures("copy_shared")
 @pytest.mark.parametrize(
     "ert_version",
@@ -176,6 +177,7 @@ def test_that_storage_matches(
         )
 
 
+@pytest.mark.integration_test
 @pytest.mark.usefixtures("copy_shared")
 @pytest.mark.parametrize(
     "ert_version",
diff --git a/tests/integration_tests/status/test_tracking_integration.py b/tests/unit_tests/test_tracking.py
similarity index 100%
rename from tests/integration_tests/status/test_tracking_integration.py
rename to tests/unit_tests/test_tracking.py
diff --git a/tests/unit_tests/workflow_runner/test_workflow_runner.py b/tests/unit_tests/workflow_runner/test_workflow_runner.py
index 9b166986ad6..800ecf04cc1 100644
--- a/tests/unit_tests/workflow_runner/test_workflow_runner.py
+++ b/tests/unit_tests/workflow_runner/test_workflow_runner.py
@@ -11,6 +11,7 @@
 from .workflow_common import WorkflowCommon
 
 
+@pytest.mark.integration_test
 @pytest.mark.usefixtures("use_tmpdir")
 def test_workflow_thread_cancel_ert_script():
     WorkflowCommon.createWaitJob()
@@ -104,6 +105,7 @@ def test_workflow_failed_job():
         assert workflow_runner.exception() is not None
 
 
+@pytest.mark.integration_test
 @pytest.mark.usefixtures("use_tmpdir")
 def test_workflow_success():
     WorkflowCommon.createWaitJob()

From 41bae7549ac85b126aff23ea30de253f32f08396 Mon Sep 17 00:00:00 2001
From: Eivind Jahren <ejah@equinor.com>
Date: Fri, 13 Sep 2024 13:39:55 +0200
Subject: [PATCH 04/11] Adjust test workflows to new categories

This removes duplication by merging in coverage generation
and doctest into the same workflow
---
 .github/workflows/benchmark.yml           |  2 +-
 .github/workflows/build_and_test.yml      |  6 +-
 .github/workflows/coverage.yml            | 80 -----------------------
 .github/workflows/doctest.yml             | 65 ------------------
 .github/workflows/test_ert.yml            | 38 +++++++++--
 .github/workflows/test_ert_with_slurm.yml |  2 +-
 ci/testkomodo.sh                          |  2 +-
 codecov.yml                               |  2 +-
 8 files changed, 38 insertions(+), 159 deletions(-)
 delete mode 100644 .github/workflows/coverage.yml
 delete mode 100644 .github/workflows/doctest.yml

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index 28845d1e04b..f71887a5829 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -37,7 +37,7 @@ jobs:
 
       - name: Run benchmark
         run: |
-          pytest tests/unit_tests/analysis/test_es_update.py::test_and_benchmark_adaptive_localization_with_fields --benchmark-json output.json
+          pytest tests/performance_tests/test_analysis.py::test_and_benchmark_adaptive_localization_with_fields --benchmark-json output.json
 
       - name: Store benchmark result
         uses: benchmark-action/github-action-benchmark@v1
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index bfd26c58d5e..d0cf52f6760 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -33,7 +33,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        test-type: [ 'integration-tests', 'unit-tests', 'gui-test' ]
+        test-type: [ 'performance-tests', 'unit-tests', 'ui-tests' ]
         python-version: [ '3.8', '3.11', '3.12' ]
         os: [ ubuntu-latest ]
     uses: ./.github/workflows/test_ert.yml
@@ -58,7 +58,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        test-type: [ 'integration-tests', 'unit-tests', 'gui-test' ]
+        test-type: [ 'performance-tests', 'unit-tests', 'ui-tests' ]
         python-version: [ '3.8', '3.12' ]
         os: [ 'macos-13', 'macos-14', 'macos-14-large']
         exclude:
@@ -80,7 +80,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        test-type: [ 'integration-tests', 'unit-tests', 'gui-test' ]
+        test-type: [ 'performance-tests', 'unit-tests', 'ui-tests' ]
         python-version: [ '3.12' ]
         os: [ 'macos-latest' ]
     uses: ./.github/workflows/test_ert.yml
diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml
deleted file mode 100644
index 74ba4d2b68f..00000000000
--- a/.github/workflows/coverage.yml
+++ /dev/null
@@ -1,80 +0,0 @@
-name: Python coverage
-
-on:
- push:
-   branches:
-     - main
-     - 'version-**'
-   tags: "*"
- pull_request:
-
-env:
-  UV_SYSTEM_PYTHON: 1
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
-
-jobs:
-  python-test-coverage:
-    name: Python Coverage
-    timeout-minutes: 40
-    runs-on: ubuntu-latest
-    strategy:
-      fail-fast: false
-      matrix:
-        test-type: ['integration-tests', 'unit-tests', 'gui-tests']
-    steps:
-    - uses: actions/checkout@v4
-      with:
-        fetch-depth: 0
-        submodules: true
-        lfs: true
-
-    - name: Set up Python
-      uses: actions/setup-python@v5
-      with:
-        python-version: '3.12'
-
-    - name: Install uv
-      run: pip install uv
-
-    - name: Install with dependencies
-      run: |
-        uv pip install ".[dev]"
-
-    - name: Test GUI
-      if: matrix.test-type == 'gui-tests'
-      run: |
-        pytest tests/ --cov=ert -m "requires_window_manager" --cov-report=xml:cov.xml -v
-
-    - name: Test Integration
-      if: matrix.test-type == 'integration-tests'
-      run: |
-        pytest tests/ -n logical --cov=ert -m "integration_test" --cov-report=xml:cov.xml
-
-    - name: Test units
-      if: matrix.test-type == 'unit-tests'
-      run: |
-        pytest tests/unit_tests -n logical --cov=ert -m "not integration_test and not requires_window_manager" --cov-report=xml:cov.xml --dist loadgroup
-
-    - name: Upload python coverage to Codecov
-      uses: codecov/codecov-action@v4
-      id: codecov1
-      continue-on-error: true
-      with:
-        token: ${{ secrets.CODECOV_TOKEN }}
-        fail_ci_if_error: true
-        files: cov.xml
-        flags: ${{ matrix.test-type }}
-    - name: codecov retry sleep
-      if: steps.codecov1.outcome == 'failure'
-      run: |
-        sleep 30
-    - name: Codecov retry
-      uses: codecov/codecov-action@v4
-      if: steps.codecov1.outcome == 'failure'
-      with:
-        token: ${{ secrets.CODECOV_TOKEN }}
-        files: cov.xml
-        fail_ci_if_error: ${{ github.ref == 'refs/heads/main' }}
diff --git a/.github/workflows/doctest.yml b/.github/workflows/doctest.yml
deleted file mode 100644
index 92c53c59440..00000000000
--- a/.github/workflows/doctest.yml
+++ /dev/null
@@ -1,65 +0,0 @@
-name: Python doctest
-
-on:
- push:
-   branches:
-     - main
-     - 'version-**'
-   tags: "*"
- pull_request:
-
-env:
-  UV_SYSTEM_PYTHON: 1
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
-
-jobs:
-  python-doctest:
-    name: Set up Python
-    timeout-minutes: 40
-    runs-on: ubuntu-latest
-    strategy:
-      fail-fast: false
-      matrix:
-        python-version: ['3.12']
-    steps:
-    - uses: actions/checkout@v4
-    - name: Set up Python
-      uses: actions/setup-python@v5
-      id: setup_python
-      with:
-        python-version: ${{ matrix.python-version }}
-
-    - name: Install uv
-      run: pip install uv
-
-    - run: |
-        uv pip install -e ".[dev]"
-
-    - name: Test doctest
-      run: |
-        # dark storage assumes it is a started service so cannot be excluded
-        # by pytest blindly
-        pytest --doctest-modules --cov=ert --cov-report=xml:cov.xml src/ --ignore src/ert/dark_storage
-
-    - name: Upload coverage to Codecov
-      id: codecov1
-      uses: codecov/codecov-action@v4
-      continue-on-error: true
-      with:
-        token: ${{ secrets.CODECOV_TOKEN }}
-        fail_ci_if_error: true
-        files: cov.xml
-    - name: codecov retry sleep
-      if: steps.codecov1.outcome == 'failure'
-      run: |
-        sleep 30
-    - name: Codecov retry
-      uses: codecov/codecov-action@v4
-      if: steps.codecov1.outcome == 'failure'
-      with:
-        token: ${{ secrets.CODECOV_TOKEN }}
-        files: cov.xml
-        fail_ci_if_error: ${{ github.ref == 'refs/heads/main' }}
diff --git a/.github/workflows/test_ert.yml b/.github/workflows/test_ert.yml
index d94b3fffa7e..2f74997ed39 100644
--- a/.github/workflows/test_ert.yml
+++ b/.github/workflows/test_ert.yml
@@ -42,20 +42,22 @@ jobs:
       run: |
         uv pip install ".[dev]"
 
-    - name: Test GUI
-      if: inputs.test-type == 'gui-test'
+    - name: UI Test
+      if: inputs.test-type == 'ui-tests'
       run: |
-        pytest tests --junit-xml=junit.xml -v --mpl -m "requires_window_manager" --benchmark-disable
+        pytest --cov=ert --cov-report=xml:cov1.xml --junit-xml=junit.xml -v --mpl --benchmark-disable tests/ui_tests/gui
+        pytest --cov=ert --cov-report=xml:cov2.xml --junit-xml=junit.xml -n logical -v --benchmark-disable  --dist loadgroup tests/ui_tests/cli
 
     - name: Unit Test
       if: inputs.test-type == 'unit-tests'
       run: |
-        pytest tests --junit-xml=junit.xml -n logical --show-capture=stderr -v -m "not integration_test and not requires_window_manager" --benchmark-disable --dist loadgroup
+        pytest --cov=ert --cov-report=xml:cov1.xml --junit-xml=junit.xml -n logical --show-capture=stderr -v --benchmark-disable --dist loadgroup tests/unit_tests
+        pytest --doctest-modules --cov=ert --cov-report=xml:cov2.xml src/ --ignore src/ert/dark_storage
 
-    - name: Integration Test
-      if: inputs.test-type == 'integration-tests'
+    - name: Performance Test
+      if: inputs.test-type == 'performance-tests'
       run: |
-        pytest tests --junit-xml=junit.xml -n logical --show-capture=stderr -v -m "integration_test and not requires_window_manager" --benchmark-disable
+        pytest --cov=ert --cov-report=xml:cov1.xml --junit-xml=junit.xml -n logical --show-capture=stderr -v --benchmark-disable  --dist loadgroup tests/performance_tests
 
     - name: Test for a clean repository
       run: |
@@ -63,6 +65,28 @@ jobs:
         git status --porcelain | sed '/ert.*.whl$\|\/block_storage$/d'
         test -z "$(git status --porcelain | sed '/ert.*.whl$\\|\\/block_storage$/d')"
 
+    - name: Upload coverage to Codecov
+      id: codecov1
+      uses: codecov/codecov-action@v4
+      continue-on-error: true
+      with:
+        token: ${{ secrets.CODECOV_TOKEN }}
+        fail_ci_if_error: true
+        files: cov1.xml,cov2.xml
+        flags: ${{ inputs.test-type }}
+    - name: codecov retry sleep
+      if: steps.codecov1.outcome == 'failure'
+      run: |
+        sleep 30
+    - name: Codecov retry
+      uses: codecov/codecov-action@v4
+      if: steps.codecov1.outcome == 'failure'
+      with:
+        token: ${{ secrets.CODECOV_TOKEN }}
+        files: cov1.xml,cov2.xml
+        flags: ${{ inputs.test-type }}
+        fail_ci_if_error: ${{ github.ref == 'refs/heads/main' }}
+
     - uses: test-summary/action@v2
       continue-on-error: true
       with:
diff --git a/.github/workflows/test_ert_with_slurm.yml b/.github/workflows/test_ert_with_slurm.yml
index 925602b9cb8..653029c4acd 100644
--- a/.github/workflows/test_ert_with_slurm.yml
+++ b/.github/workflows/test_ert_with_slurm.yml
@@ -69,7 +69,7 @@ jobs:
       run: |
         set -e
         export _ERT_TESTS_ALTERNATIVE_QUEUE=AlternativeQ
-        pytest tests/integration_tests/scheduler --slurm
+        pytest tests/unit_tests/scheduler --slurm
 
     - name: Test poly-example on slurm
       run: |
diff --git a/ci/testkomodo.sh b/ci/testkomodo.sh
index 4ab0efedd68..3d296c352b1 100755
--- a/ci/testkomodo.sh
+++ b/ci/testkomodo.sh
@@ -53,7 +53,7 @@ start_tests () {
     unset OMP_NUM_THREADS
 
     basetemp=$(mktemp -d -p $_ERT_TESTS_SHARED_TMP)
-    pytest --timeout=3600 -v --$_ERT_TESTS_QUEUE_SYSTEM --basetemp="$basetemp" integration_tests/scheduler
+    pytest --timeout=3600 -v --$_ERT_TESTS_QUEUE_SYSTEM --basetemp="$basetemp" unit_tests/scheduler
     rm -rf "$basetemp" || true
 
     popd
diff --git a/codecov.yml b/codecov.yml
index 9bdf1bdd7e1..c2f624c8dfe 100644
--- a/codecov.yml
+++ b/codecov.yml
@@ -4,4 +4,4 @@ fixes:
 comment:
     # The code coverage is made up of 4 test runs so only after all coverage
     # reports have been uploaded will the comparison be sane
-    after_n_builds: 4
+    after_n_builds: 12

From 0497c30dd698c7c6b63b76f04c87ba3483736fab Mon Sep 17 00:00:00 2001
From: Eivind Jahren <ejah@equinor.com>
Date: Fri, 13 Sep 2024 14:23:14 +0200
Subject: [PATCH 05/11] Update documentation with test categories

---
 .github/PULL_REQUEST_TEMPLATE.md |  3 ++-
 CONTRIBUTING.md                  | 42 ++++++++++++++++++++------------
 README.md                        | 21 ++++++++++++++++
 3 files changed, 49 insertions(+), 17 deletions(-)

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index 148f7c604c3..d35a5f15889 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -11,7 +11,8 @@ _Short description of the approach_
 - [ ] PR title captures the intent of the changes, and is fitting for release notes.
 - [ ] Added appropriate release note label
 - [ ] Commit history is consistent and clean, in line with the [contribution guidelines](https://github.com/equinor/ert/blob/main/CONTRIBUTING.md).
-- [ ] Make sure tests pass locally (after every commit!)
+- [ ] Make sure unit tests pass locally after every commit (`git rebase -i 10
+      --exec 'pytest tests/unit_tests -n logical -m "not integration_test"'`)
 
 ## When applicable
 - [ ] **When there are user facing changes**: Updated documentation
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 72c39d2c4b1..f8e64980a0f 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -6,7 +6,32 @@ The following is a set of guidelines for contributing to ERT.
 
 1. Automatic code formatting is applied via pre-commit hooks. You
    can see how to set that up [here](https://pre-commit.com/).
-1. All code must be testable and unit tested.
+2. All code must be testable and unit tested.
+
+## Test categories
+
+Tests that are in the `tests/unit_tests` directory and are
+not marked with `integration_test` are ment to be exceptionally
+fast and reliable. This is so that one can run those while
+iterating on the code. This means special care has to
+be made when placing tests here.
+
+### Integration tests
+
+By "integration test" we simply mean unit tests that did not quite
+cut it, either because they are too slow, too unreliable, have difficult
+to understand error messages, etc.
+
+### UI tests
+
+These tests are ment to test behavior from a user interaction view to
+ensure that the application behaves the way the user expects independently
+of code changes. We have two user interfaces, the cli and the gui so those
+are subdirectories.
+
+## Performance tests
+
+Tests that runtime and memory performance does not degrade.
 
 ## Commits
 
@@ -63,18 +88,3 @@ noise in the review process.
   * rebase onto base branch if necessary,
   * squash whatever still needs squashing, and
   * [fast-forward](https://docs.github.com/en/repositories/configuring-branches-and-merges-in-your-repository/defining-the-mergeability-of-pull-requests/about-protected-branches#require-linear-history) merge.
-
-### Build documentation
-
-You can build the documentation after installation by running
-```bash
-pip install ".[dev]"
-sphinx-build -n -v -E -W ./docs ./tmp/ert_docs
-```
-and then open the generated `./tmp/ert_docs/index.html` in a browser.
-
-To automatically reload on changes you may use
-
-```bash
-sphinx-autobuild docs docs/_build/html
-```
diff --git a/README.md b/README.md
index 8c39e257e35..e1555869113 100644
--- a/README.md
+++ b/README.md
@@ -61,6 +61,13 @@ pip install -e ".[dev]"
 pytest tests/
 ```
 
+There are many kinds of tests in the `tests` directory, while iterating on your
+code you can run a fast subset of the tests with
+
+```sh
+pytest -n logical tests/unit_tests -m "not integration_tests"
+```
+
 [Git LFS](https://git-lfs.com/) must be installed to get all the files. This is packaged as `git-lfs` on Ubuntu, Fedora or macOS Homebrew. For Equinor RGS node users, it is possible to use `git` from Red Hat Software Collections:
 ```sh
 source /opt/rh/rh-git227/enable
@@ -75,6 +82,20 @@ If you checked out submodules without having git lfs installed, you can force gi
 git submodule foreach "git lfs pull"
 ```
 
+### Build documentation
+
+You can build the documentation after installation by running
+```bash
+pip install ".[dev]"
+sphinx-build -n -v -E -W ./docs ./tmp/ert_docs
+```
+and then open the generated `./tmp/ert_docs/index.html` in a browser.
+
+To automatically reload on changes you may use
+
+```bash
+sphinx-autobuild docs docs/_build/html
+```
 
 ### Style requirements
 

From 3fa28b13d6416594f4209dee2ddd3c9ec4be4013 Mon Sep 17 00:00:00 2001
From: Eivind Jahren <ejah@equinor.com>
Date: Fri, 13 Sep 2024 19:27:04 +0200
Subject: [PATCH 06/11] Remove integration_test markers in ui_tests

---
 .../cli/analysis/test_adaptive_localization.py        |  3 ---
 tests/ui_tests/cli/analysis/test_es_update.py         |  2 --
 tests/ui_tests/cli/test_cli.py                        | 11 -----------
 tests/ui_tests/cli/test_field_parameter.py            |  3 ---
 tests/ui_tests/cli/test_local_driver.py               |  3 ---
 tests/ui_tests/cli/test_parameter_sample_types.py     |  2 --
 tests/ui_tests/cli/test_shell.py                      |  3 ---
 7 files changed, 27 deletions(-)

diff --git a/tests/ui_tests/cli/analysis/test_adaptive_localization.py b/tests/ui_tests/cli/analysis/test_adaptive_localization.py
index 70bfa515dd4..378bfe87f69 100644
--- a/tests/ui_tests/cli/analysis/test_adaptive_localization.py
+++ b/tests/ui_tests/cli/analysis/test_adaptive_localization.py
@@ -32,7 +32,6 @@ def run_cli_ES_with_case(poly_config):
     return prior_ensemble, posterior_ensemble
 
 
-@pytest.mark.integration_test
 @pytest.mark.usefixtures("copy_poly_case")
 def test_that_adaptive_localization_with_cutoff_1_equals_ensemble_prior():
     set_adaptive_localization_1 = dedent(
@@ -225,7 +224,6 @@ def _evaluate(coeffs, x):
     assert set_of_records_from_xr == expected_records
 
 
-@pytest.mark.integration_test
 @pytest.mark.usefixtures("copy_poly_case")
 def test_that_adaptive_localization_with_cutoff_0_equals_ESupdate():
     """
@@ -263,7 +261,6 @@ def test_that_adaptive_localization_with_cutoff_0_equals_ESupdate():
     assert np.allclose(posterior_sample_loc0, posterior_sample_noloc)
 
 
-@pytest.mark.integration_test
 @pytest.mark.usefixtures("copy_poly_case")
 def test_that_posterior_generalized_variance_increases_in_cutoff():
     rng = np.random.default_rng(42)
diff --git a/tests/ui_tests/cli/analysis/test_es_update.py b/tests/ui_tests/cli/analysis/test_es_update.py
index fe583b2cf6c..76f3afeb371 100644
--- a/tests/ui_tests/cli/analysis/test_es_update.py
+++ b/tests/ui_tests/cli/analysis/test_es_update.py
@@ -52,7 +52,6 @@ def test_that_posterior_has_lower_variance_than_prior():
     )
 
 
-@pytest.mark.integration_test
 @pytest.mark.usefixtures("copy_snake_oil_field")
 def test_that_surfaces_retain_their_order_when_loaded_and_saved_by_ert():
     """This is a regression test to make sure ert does not use the wrong order
@@ -128,7 +127,6 @@ def sample_prior(nx, ny):
         )
 
 
-@pytest.mark.integration_test
 @pytest.mark.usefixtures("copy_snake_oil_field")
 def test_update_multiple_param():
     run_cli(
diff --git a/tests/ui_tests/cli/test_cli.py b/tests/ui_tests/cli/test_cli.py
index 9d0ef8a833e..f57dcc3146f 100644
--- a/tests/ui_tests/cli/test_cli.py
+++ b/tests/ui_tests/cli/test_cli.py
@@ -549,7 +549,6 @@ def fixture_mock_cli_run(monkeypatch):
     yield mocked_monitor, mocked_thread_join, mocked_thread_start
 
 
-@pytest.mark.integration_test
 @pytest.mark.usefixtures("copy_poly_case")
 def test_ensemble_evaluator():
     run_cli(
@@ -564,7 +563,6 @@ def test_ensemble_evaluator():
 
 
 @pytest.mark.usefixtures("copy_poly_case")
-@pytest.mark.integration_test
 def test_es_mda(snapshot):
     with fileinput.input("poly.ert", inplace=True) as fin:
         for line_nr, line in enumerate(fin):
@@ -612,7 +610,6 @@ def test_es_mda(snapshot):
         pytest.param(ES_MDA_MODE, "iter-%d", id=f"{ES_MDA_MODE}"),
     ],
 )
-@pytest.mark.integration_test
 @pytest.mark.usefixtures("copy_poly_case")
 def test_cli_does_not_run_without_observations(mode, target):
     def remove_linestartswith(file_name: str, startswith: str):
@@ -627,7 +624,6 @@ def remove_linestartswith(file_name: str, startswith: str):
         run_cli(mode, "--disable-monitor", "--target-case", target, "poly.ert")
 
 
-@pytest.mark.integration_test
 @pytest.mark.usefixtures("copy_poly_case")
 def test_ensemble_evaluator_disable_monitoring():
     run_cli(
@@ -639,7 +635,6 @@ def test_ensemble_evaluator_disable_monitoring():
     )
 
 
-@pytest.mark.integration_test
 @pytest.mark.usefixtures("copy_poly_case")
 def test_cli_test_run(mock_cli_run):
     run_cli(TEST_RUN_MODE, "--disable-monitor", "poly.ert")
@@ -650,7 +645,6 @@ def test_cli_test_run(mock_cli_run):
     thread_start_mock.assert_has_calls([[call(), call()]])
 
 
-@pytest.mark.integration_test
 @pytest.mark.usefixtures("copy_poly_case")
 def test_ies():
     run_cli(
@@ -664,7 +658,6 @@ def test_ies():
     )
 
 
-@pytest.mark.integration_test
 @pytest.mark.usefixtures("copy_poly_case")
 def test_that_running_ies_with_different_steplength_produces_different_result():
     """This is a regression test to make sure that different step-lengths
@@ -735,7 +728,6 @@ def _run(target, experiment_name):
     assert not np.isclose(result_1.loc["iter-1"], result_2.loc["iter-1"]).all()
 
 
-@pytest.mark.integration_test
 @pytest.mark.usefixtures("copy_poly_case")
 @pytest.mark.parametrize(
     "prior_mask,reals_rerun_option",
@@ -785,7 +777,6 @@ def test_that_prior_is_not_overwritten_in_ensemble_experiment(
     assert len([msg for msg in caplog.messages if "RANDOM_SEED" in msg]) == 1
 
 
-@pytest.mark.integration_test
 @pytest.mark.usefixtures("copy_poly_case")
 def test_failing_job_cli_error_message():
     # modify poly_eval.py
@@ -809,7 +800,6 @@ def test_failing_job_cli_error_message():
         pytest.fail(msg="Expected run cli to raise ErtCliError!")
 
 
-@pytest.mark.integration_test
 @pytest.mark.usefixtures("copy_poly_case")
 def test_exclude_parameter_from_update():
     with fileinput.input("poly.ert", inplace=True) as fin:
@@ -864,7 +854,6 @@ def mocked__init__(*args, **kwargs) -> None:
     assert "Foobar error" in captured.err
 
 
-@pytest.mark.integration_test
 @pytest.mark.usefixtures("copy_poly_case")
 def test_that_log_is_cleaned_up_from_repeated_forward_model_steps(caplog):
     """Verify that the run model now gereneates a cleanup log when
diff --git a/tests/ui_tests/cli/test_field_parameter.py b/tests/ui_tests/cli/test_field_parameter.py
index 64adebe7d5a..48532940ea4 100644
--- a/tests/ui_tests/cli/test_field_parameter.py
+++ b/tests/ui_tests/cli/test_field_parameter.py
@@ -5,7 +5,6 @@
 
 import numpy as np
 import numpy.testing
-import pytest
 import xtgeo
 
 from ert.config import ErtConfig
@@ -15,7 +14,6 @@
 from .run_cli import run_cli
 
 
-@pytest.mark.integration_test
 def test_field_param_update(tmpdir):
     """
     This replicates the poly example, only it uses FIELD parameter
@@ -138,7 +136,6 @@ def test_field_param_update(tmpdir):
         )
 
 
-@pytest.mark.integration_test
 def test_parameter_update_with_inactive_cells_xtgeo_grdecl(tmpdir):
     """
     This replicates the poly example, only it uses FIELD parameter
diff --git a/tests/ui_tests/cli/test_local_driver.py b/tests/ui_tests/cli/test_local_driver.py
index eb41e22d2e3..27726995f1a 100644
--- a/tests/ui_tests/cli/test_local_driver.py
+++ b/tests/ui_tests/cli/test_local_driver.py
@@ -4,8 +4,6 @@
 from pathlib import Path
 from textwrap import dedent
 
-import pytest
-
 
 def create_ert_config(path: Path):
     ert_config_path = Path(path / "ert_config.ert")
@@ -38,7 +36,6 @@ def create_ert_config(path: Path):
     )
 
 
-@pytest.mark.integration_test
 async def test_subprocesses_live_on_after_ert_dies(tmp_path):
     # Have ERT run a forward model that writes in PID to a file, then sleeps
     # Forcefully terminate ERT and assert that the child process is not terminated
diff --git a/tests/ui_tests/cli/test_parameter_sample_types.py b/tests/ui_tests/cli/test_parameter_sample_types.py
index 2de7cb6a247..4f6bd04cd16 100644
--- a/tests/ui_tests/cli/test_parameter_sample_types.py
+++ b/tests/ui_tests/cli/test_parameter_sample_types.py
@@ -22,7 +22,6 @@ def load_from_forward_model(ert_config, ensemble):
 
 
 @pytest.mark.usefixtures("set_site_config")
-@pytest.mark.integration_test
 def test_surface_param_update(tmpdir):
     """Full update with a surface parameter, it mirrors the poly example,
     except it uses SURFACE instead of GEN_KW.
@@ -173,7 +172,6 @@ def test_surface_param_update(tmpdir):
         assert not (surf.values == surf2.values).any()
 
 
-@pytest.mark.integration_test
 @pytest.mark.limit_memory("130 MB")
 @pytest.mark.flaky(reruns=5)
 def test_field_param_memory(tmpdir):
diff --git a/tests/ui_tests/cli/test_shell.py b/tests/ui_tests/cli/test_shell.py
index e30418ada89..857dc2c87aa 100644
--- a/tests/ui_tests/cli/test_shell.py
+++ b/tests/ui_tests/cli/test_shell.py
@@ -1,14 +1,11 @@
 import os
 import os.path
 
-import pytest
-
 from ert.plugins import ErtPluginManager
 
 from .run_cli import run_cli_with_pm
 
 
-@pytest.mark.integration_test
 def test_shell_scripts_integration(tmpdir):
     """
     The following test is a regression test that

From 415a3158c4e55220b27ef903c0ed24b1efb24056 Mon Sep 17 00:00:00 2001
From: Eivind Jahren <ejah@equinor.com>
Date: Fri, 13 Sep 2024 20:55:56 +0200
Subject: [PATCH 07/11] Avoid polluting .slurm_submit* files from test

---
 tests/unit_tests/scheduler/test_slurm_driver.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/unit_tests/scheduler/test_slurm_driver.py b/tests/unit_tests/scheduler/test_slurm_driver.py
index bf920a0e67c..5b8dc967617 100644
--- a/tests/unit_tests/scheduler/test_slurm_driver.py
+++ b/tests/unit_tests/scheduler/test_slurm_driver.py
@@ -38,6 +38,7 @@ def capturing_sbatch(monkeypatch, tmp_path):
     sbatch_path.chmod(sbatch_path.stat().st_mode | stat.S_IEXEC)
 
 
+@pytest.mark.usefixtures("use_tmpdir")
 @pytest.mark.parametrize(
     "sbatch_script, scontrol_script, exit_code",
     [

From 8282958fb0257ce259b32964ef46b81dc8d2ffc9 Mon Sep 17 00:00:00 2001
From: Eivind Jahren <ejah@equinor.com>
Date: Fri, 13 Sep 2024 21:00:21 +0200
Subject: [PATCH 08/11] Add group to openpbs tests

---
 tests/unit_tests/scheduler/test_openpbs_driver.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/unit_tests/scheduler/test_openpbs_driver.py b/tests/unit_tests/scheduler/test_openpbs_driver.py
index 82dc0b81046..31e7e3f32cc 100644
--- a/tests/unit_tests/scheduler/test_openpbs_driver.py
+++ b/tests/unit_tests/scheduler/test_openpbs_driver.py
@@ -37,6 +37,8 @@
 
 from .conftest import mock_bin
 
+pytestmark = pytest.mark.xdist_group("openpbs")
+
 
 @given(st.lists(st.sampled_from(JOB_STATES)))
 async def test_events_produced_from_jobstate_updates(jobstate_sequence: List[str]):

From 83d7fc63fdfbfa7837adb7337675038e46ca2e5d Mon Sep 17 00:00:00 2001
From: Eivind Jahren <ejah@equinor.com>
Date: Sat, 14 Sep 2024 14:39:29 +0200
Subject: [PATCH 09/11] Make flaky tracking test a ui test

---
 tests/ui_tests/cli/test_cli.py    |  52 +++++++++++++++
 tests/unit_tests/test_tracking.py | 102 ------------------------------
 2 files changed, 52 insertions(+), 102 deletions(-)

diff --git a/tests/ui_tests/cli/test_cli.py b/tests/ui_tests/cli/test_cli.py
index f57dcc3146f..adcea40b353 100644
--- a/tests/ui_tests/cli/test_cli.py
+++ b/tests/ui_tests/cli/test_cli.py
@@ -3,6 +3,7 @@
 import logging
 import os
 import threading
+from datetime import datetime
 from pathlib import Path
 from textwrap import dedent
 from unittest.mock import Mock, call
@@ -11,6 +12,7 @@
 import pandas as pd
 import pytest
 import xtgeo
+from resdata.summary import Summary
 
 import _ert.threading
 import ert.shared
@@ -877,3 +879,53 @@ def test_that_log_is_cleaned_up_from_repeated_forward_model_steps(caplog):
             "0-4",
         )
     assert len([msg for msg in caplog.messages if expected_msg in msg]) == 1
+
+
+def run_sim(start_date):
+    """
+    Create a summary file, the contents of which are not important
+    """
+    summary = Summary.writer("ECLIPSE_CASE", start_date, 3, 3, 3)
+    summary.add_variable("FOPR", unit="SM3/DAY")
+    t_step = summary.add_t_step(1, sim_days=1)
+    t_step["FOPR"] = 1
+    summary.fwrite()
+
+
+def test_tracking_missing_ecl(monkeypatch, tmp_path, caplog):
+    config_file = tmp_path / "config.ert"
+    monkeypatch.chdir(tmp_path)
+    config_file.write_text(
+        dedent(
+            """
+            NUM_REALIZATIONS 2
+
+            ECLBASE ECLIPSE_CASE
+            SUMMARY *
+            MAX_SUBMIT 1 -- will fail first and every time
+            REFCASE ECLIPSE_CASE
+
+            """
+        )
+    )
+    # We create a reference case, but there will be no response
+    run_sim(datetime(2014, 9, 10))
+    with pytest.raises(ErtCliError):
+        run_cli(
+            TEST_RUN_MODE,
+            str(config_file),
+        )
+    assert (
+        f"Realization: 0 failed after reaching max submit (1):\n\t\n"
+        "status from done callback: "
+        "Could not find any unified "
+        f"summary file matching case path "
+        f"{Path().absolute()}/simulations/realization-0/"
+        "iter-0/ECLIPSE_CASE"
+    ) in caplog.messages
+
+    case = f"{Path().absolute()}/simulations/realization-0/iter-0/ECLIPSE_CASE"
+    assert (
+        f"Expected file {case}.UNSMRY not created by forward model!\nExpected "
+        f"file {case}.SMSPEC not created by forward model!"
+    ) in caplog.messages
diff --git a/tests/unit_tests/test_tracking.py b/tests/unit_tests/test_tracking.py
index 4880b5cf564..5aaaec5b715 100644
--- a/tests/unit_tests/test_tracking.py
+++ b/tests/unit_tests/test_tracking.py
@@ -1,17 +1,13 @@
 import fileinput
 import json
-import logging
 import os
 import re
 from argparse import ArgumentParser
-from datetime import datetime
 from pathlib import Path
-from textwrap import dedent
 from typing import Dict
 
 import pytest
 from jsonpath_ng import parse
-from resdata.summary import Summary
 
 from _ert.threading import ErtThread
 from ert.__main__ import ert_parser
@@ -389,101 +385,3 @@ def test_run_information_present_as_env_var_in_fm_context(
             assert key in jobs_data["global_environment"]
             if key == "_ERT_SIMULATION_MODE":
                 assert jobs_data["global_environment"][key] == mode
-
-
-def run_sim(start_date):
-    """
-    Create a summary file, the contents of which are not important
-    """
-    summary = Summary.writer("ECLIPSE_CASE", start_date, 3, 3, 3)
-    summary.add_variable("FOPR", unit="SM3/DAY")
-    t_step = summary.add_t_step(1, sim_days=1)
-    t_step["FOPR"] = 1
-    summary.fwrite()
-
-
-@pytest.mark.integration_test
-def test_tracking_missing_ecl(tmpdir, caplog, storage):
-    with tmpdir.as_cwd():
-        config = dedent(
-            """
-        NUM_REALIZATIONS 2
-
-        ECLBASE ECLIPSE_CASE
-        SUMMARY *
-        MAX_SUBMIT 1 -- will fail first and every time
-        REFCASE ECLIPSE_CASE
-
-        """
-        )
-        with open("config.ert", "w", encoding="utf-8") as fh:
-            fh.writelines(config)
-        # We create a reference case, but there will be no response
-        run_sim(datetime(2014, 9, 10))
-        parser = ArgumentParser(prog="test_main")
-        parsed = ert_parser(
-            parser,
-            [
-                TEST_RUN_MODE,
-                "config.ert",
-            ],
-        )
-
-        ert_config = ErtConfig.from_file(parsed.config)
-        os.chdir(ert_config.config_path)
-        events = Events()
-        model = create_model(
-            ert_config,
-            storage,
-            parsed,
-            events,
-        )
-
-        evaluator_server_config = EvaluatorServerConfig(
-            custom_port_range=range(1024, 65535),
-            custom_host="127.0.0.1",
-            use_token=False,
-            generate_cert=False,
-        )
-
-        thread = ErtThread(
-            name="ert_cli_simulation_thread",
-            target=model.start_simulations_thread,
-            args=(evaluator_server_config,),
-        )
-        with caplog.at_level(logging.ERROR):
-            thread.start()
-            thread.join()
-            failures = []
-
-            for event in events:
-                if isinstance(event, EndEvent):
-                    failures.append(event)
-        assert (
-            f"Realization: 0 failed after reaching max submit (1):\n\t\n"
-            "status from done callback: "
-            "Could not find any unified "
-            f"summary file matching case path "
-            f"{Path().absolute()}/simulations/realization-0/"
-            "iter-0/ECLIPSE_CASE"
-        ) in caplog.messages
-
-        # Just also check that it failed for the expected reason
-        assert len(failures) == 1
-        assert (
-            f"Realization: 0 failed after reaching max submit (1):\n\t\n"
-            "status from done callback: "
-            "Could not find any unified "
-            f"summary file matching case path "
-            f"{Path().absolute()}/simulations/realization-0/"
-            "iter-0/ECLIPSE_CASE"
-        ) in failures[0].msg
-        case = f"{Path().absolute()}/simulations/realization-0/iter-0/ECLIPSE_CASE"
-        assert (
-            f"Expected file {case}.UNSMRY not created by forward model!\nExpected "
-            f"file {case}.SMSPEC not created by forward model!"
-        ) in caplog.messages
-        assert (
-            f"Expected file {case}.UNSMRY not created by forward model!\nExpected "
-            f"file {case}.SMSPEC not created by forward model!"
-        ) in failures[0].msg

From 46198c67a5f909070d7be39aaae8441f068e6fc3 Mon Sep 17 00:00:00 2001
From: Eivind Jahren <ejah@equinor.com>
Date: Sat, 14 Sep 2024 16:51:03 +0200
Subject: [PATCH 10/11] Separate gui and cli tests for speed

---
 .github/workflows/build_and_test.yml |  6 +++---
 .github/workflows/test_ert.yml       | 10 +++++++---
 codecov.yml                          |  2 +-
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index d0cf52f6760..22d162d9332 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -33,7 +33,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        test-type: [ 'performance-tests', 'unit-tests', 'ui-tests' ]
+        test-type: [ 'performance-tests', 'unit-tests', 'gui-tests', 'cli-tests' ]
         python-version: [ '3.8', '3.11', '3.12' ]
         os: [ ubuntu-latest ]
     uses: ./.github/workflows/test_ert.yml
@@ -58,7 +58,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        test-type: [ 'performance-tests', 'unit-tests', 'ui-tests' ]
+        test-type: [ 'performance-tests', 'unit-tests', 'gui-tests', 'cli-tests' ]
         python-version: [ '3.8', '3.12' ]
         os: [ 'macos-13', 'macos-14', 'macos-14-large']
         exclude:
@@ -80,7 +80,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        test-type: [ 'performance-tests', 'unit-tests', 'ui-tests' ]
+        test-type: [ 'performance-tests', 'unit-tests', 'gui-tests', 'cli-tests' ]
         python-version: [ '3.12' ]
         os: [ 'macos-latest' ]
     uses: ./.github/workflows/test_ert.yml
diff --git a/.github/workflows/test_ert.yml b/.github/workflows/test_ert.yml
index 2f74997ed39..d7b9ee82154 100644
--- a/.github/workflows/test_ert.yml
+++ b/.github/workflows/test_ert.yml
@@ -42,11 +42,15 @@ jobs:
       run: |
         uv pip install ".[dev]"
 
-    - name: UI Test
-      if: inputs.test-type == 'ui-tests'
+    - name: GUI Test
+      if: inputs.test-type == 'gui-tests'
       run: |
         pytest --cov=ert --cov-report=xml:cov1.xml --junit-xml=junit.xml -v --mpl --benchmark-disable tests/ui_tests/gui
-        pytest --cov=ert --cov-report=xml:cov2.xml --junit-xml=junit.xml -n logical -v --benchmark-disable  --dist loadgroup tests/ui_tests/cli
+
+    - name: CLI Test
+      if: inputs.test-type == 'cli-tests'
+      run: |
+        pytest --cov=ert --cov-report=xml:cov1.xml --junit-xml=junit.xml -n logical -v --benchmark-disable  --dist loadgroup tests/ui_tests/cli
 
     - name: Unit Test
       if: inputs.test-type == 'unit-tests'
diff --git a/codecov.yml b/codecov.yml
index c2f624c8dfe..8285bbe3227 100644
--- a/codecov.yml
+++ b/codecov.yml
@@ -4,4 +4,4 @@ fixes:
 comment:
     # The code coverage is made up of 4 test runs so only after all coverage
     # reports have been uploaded will the comparison be sane
-    after_n_builds: 12
+    after_n_builds: 16

From 8176bc197b0319f95a40b33ed656995d0245d2d7 Mon Sep 17 00:00:00 2001
From: larsevj <levje@equinor.com>
Date: Fri, 4 Oct 2024 12:31:53 +0200
Subject: [PATCH 11/11] Debug mac intel

---
 .github/workflows/build_and_test.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 22d162d9332..052bb806b3a 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -23,7 +23,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: [ '3.8', '3.9', '3.10', '3.11', '3.12' ]
+        python-version: [ '3.12' ]
 
     uses: ./.github/workflows/build-wheels.yml
     with:
@@ -34,7 +34,7 @@ jobs:
       fail-fast: false
       matrix:
         test-type: [ 'performance-tests', 'unit-tests', 'gui-tests', 'cli-tests' ]
-        python-version: [ '3.8', '3.11', '3.12' ]
+        python-version: [ '3.12' ]
         os: [ ubuntu-latest ]
     uses: ./.github/workflows/test_ert.yml
     with:
@@ -47,7 +47,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ ubuntu-latest ]
-        python-version: [ '3.8', '3.11', '3.12' ]
+        python-version: [ '3.12' ]
     uses: ./.github/workflows/test_ert_with_slurm.yml
     with:
       os: ${{ matrix.os }}
@@ -81,8 +81,8 @@ jobs:
       fail-fast: false
       matrix:
         test-type: [ 'performance-tests', 'unit-tests', 'gui-tests', 'cli-tests' ]
-        python-version: [ '3.12' ]
-        os: [ 'macos-latest' ]
+        python-version: [ '3.8', '3.11', '3.12' ]
+        os: [ 'macos-14-large' ]
     uses: ./.github/workflows/test_ert.yml
     with:
       os: ${{ matrix.os }}