From 4856609281d40a983d6ab93bb84c19b1fe795d42 Mon Sep 17 00:00:00 2001 From: Feda Curic Date: Mon, 11 Dec 2023 07:44:04 +0100 Subject: [PATCH 1/3] Add GitHub workflow for benchmarking --- .github/workflows/benchmark.yml | 49 ++++++ pyproject.toml | 1 + tests/unit_tests/analysis/test_es_update.py | 162 +++++++++++++++++++- 3 files changed, 211 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/benchmark.yml diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml new file mode 100644 index 00000000000..85b77edbced --- /dev/null +++ b/.github/workflows/benchmark.yml @@ -0,0 +1,49 @@ +name: Benchmark Adaptive Localization +on: + push: + branches: + - benchmark + +permissions: + # deployments permission to deploy GitHub pages website + deployments: write + # contents permission to update benchmark contents in gh-pages branch + contents: write + +jobs: + benchmark: + name: Run pytest-benchmark benchmark example + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + submodules: true + lfs: true + + - uses: actions/setup-python@v4 + id: setup_python + with: + python-version: "3.10" + cache: "pip" + cache-dependency-path: | + setup.py + pyproject.toml + + - name: Install ert with dev-deps + run: | + pip install ".[dev]" + + - name: Run benchmark + run: | + pytest tests/unit_tests/analysis/test_es_update.py::test_and_benchmark_adaptive_localization_with_fields --benchmark-json output.json + + - name: Store benchmark result + uses: benchmark-action/github-action-benchmark@v1 + with: + name: Python Benchmark with pytest-benchmark + tool: 'pytest' + output-file-path: output.json + github-token: ${{ secrets.GITHUB_TOKEN }} + auto-push: true diff --git a/pyproject.toml b/pyproject.toml index bd8d65ab3e6..5dd8110170c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -118,6 +118,7 @@ dev = [ "sphinxcontrib-plantuml", "sphinxcontrib.datatemplates", "testpath", + "gstools", ] style = [ "cmake-format", diff --git a/tests/unit_tests/analysis/test_es_update.py b/tests/unit_tests/analysis/test_es_update.py index 3cf7a9679af..8fb08e662ec 100644 --- a/tests/unit_tests/analysis/test_es_update.py +++ b/tests/unit_tests/analysis/test_es_update.py @@ -5,6 +5,7 @@ import numpy as np import pytest import xarray as xr +import xtgeo from iterative_ensemble_smoother import SIES from ert import LibresFacade @@ -484,7 +485,6 @@ def test_that_surfaces_retain_their_order_when_loaded_and_saved_by_ert(copy_case (row-major / column-major) when working with surfaces. """ rng = np.random.default_rng() - import xtgeo from scipy.ndimage import gaussian_filter def sample_prior(nx, ny): @@ -608,6 +608,166 @@ def _load_parameters(source_ens, iens_active_index, param_groups): assert np.trace(np.cov(posterior[prior_name])) < np.trace(np.cov(prior_data)) +def test_and_benchmark_adaptive_localization_with_fields( + storage, tmp_path, monkeypatch, benchmark +): + from functools import partial + + import gstools as gs + import scipy as sp + + from ert.config import Field + from ert.field_utils import Shape + + monkeypatch.chdir(tmp_path) + + rng = np.random.default_rng(42) + + # Number of grid-cells in x and y direction + nx = 40 + # Dimensionality of the problem + num_parameters = nx * nx + num_observations = 50 + num_ensemble = 25 + + diagonal = np.ones(min(num_parameters, num_observations)) + + # Create a tridiagonal matrix (easiest with scipy) + A = sp.sparse.diags( + [diagonal, diagonal, diagonal], + offsets=[-1, 0, 1], + shape=(num_observations, num_parameters), + dtype=float, + ).toarray() + + # We add some noise that is insignificant compared to the + # actual local structure in the forward model + A = A + rng.standard_normal(size=A.shape) * 0.01 + + def g(X): + """Apply the forward model.""" + return A @ X + + model = gs.Exponential(dim=2, var=2, len_scale=8) + + fields = [] + seed = gs.random.MasterRNG(20170519) + for _ in range(num_ensemble): + srf = gs.SRF(model, seed=seed()) + field = srf.structured([np.arange(nx), np.arange(nx)]) + fields.append(field) + + X = np.vstack([field.flatten() for field in fields]).T + Y = g(X) + + # Create observations: obs = g(x) + N(0, 1) + x_true = np.linspace(-1, 1, num=num_parameters) + observation_noise = rng.standard_normal(size=num_observations) # N(0, 1) noise + observations = g(x_true) + observation_noise + + shape = Shape(nx, nx, 1) + grid = xtgeo.create_box_grid(dimension=(shape.nx, shape.ny, shape.nz)) + grid.to_file("MY_EGRID.EGRID", "egrid") + + resp = GenDataConfig(name="RESPONSE") + obs = xr.Dataset( + { + "observations": ( + ["report_step", "index"], + observations.reshape((1, num_observations)), + ), + "std": ( + ["report_step", "index"], + observation_noise.reshape(1, num_observations), + ), + }, + coords={"report_step": [0], "index": np.arange(len(observations))}, + attrs={"response": "RESPONSE"}, + ) + + param_group = "PARAM_FIELD" + update_config = UpdateConfiguration( + update_steps=[ + UpdateStep( + name="ALL_ACTIVE", + observations=["OBSERVATION"], + parameters=[param_group], + ) + ] + ) + + config = Field.from_config_list( + "MY_EGRID.EGRID", + shape, + [ + param_group, + param_group, + "param.GRDECL", + "INIT_FILES:param_%d.GRDECL", + "FORWARD_INIT:False", + ], + ) + + experiment = storage.create_experiment( + parameters=[config], + responses=[resp], + observations={"OBSERVATION": obs}, + ) + + prior = storage.create_ensemble( + experiment, + ensemble_size=num_ensemble, + iteration=0, + name="prior", + ) + + for iens in range(prior.ensemble_size): + prior.state_map[iens] = RealizationStorageState.HAS_DATA + prior.save_parameters( + param_group, + iens, + xr.Dataset( + { + "values": xr.DataArray(fields[iens], dims=("x", "y")), + } + ), + ) + + prior.save_response( + "RESPONSE", + xr.Dataset( + {"values": (["report_step", "index"], [Y[:, iens]])}, + coords={"index": range(len(Y[:, iens])), "report_step": [0]}, + ), + iens, + ) + + posterior_ens = storage.create_ensemble( + prior.experiment_id, + ensemble_size=prior.ensemble_size, + iteration=1, + name="posterior", + prior_ensemble=prior, + ) + + smoother_update_run = partial( + smoother_update, + prior, + posterior_ens, + "id", + update_config, + UpdateSettings(), + ESSettings(localization=True), + ) + benchmark(smoother_update_run) + + prior_da = prior.load_parameters(param_group, range(num_ensemble)) + posterior_da = posterior_ens.load_parameters(param_group, range(num_ensemble)) + # Because of adaptive localization, not all parameters should be updated. + # This would fail if with global updates. + assert np.isclose(prior_da, posterior_da).sum() > 0 + + @pytest.mark.integration_test def test_gen_data_obs_data_mismatch(storage, uniform_parameter, update_config): resp = GenDataConfig(name="RESPONSE") From 72c99b83fe2c428b3caaee66e0c95ae77db74e17 Mon Sep 17 00:00:00 2001 From: Feda Curic Date: Mon, 11 Dec 2023 08:12:59 +0100 Subject: [PATCH 2/3] Add comments --- tests/unit_tests/analysis/test_es_update.py | 41 +++++++++------------ 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/tests/unit_tests/analysis/test_es_update.py b/tests/unit_tests/analysis/test_es_update.py index 8fb08e662ec..08aa694c6da 100644 --- a/tests/unit_tests/analysis/test_es_update.py +++ b/tests/unit_tests/analysis/test_es_update.py @@ -1,9 +1,12 @@ import re from argparse import ArgumentParser +from functools import partial from pathlib import Path +import gstools as gs import numpy as np import pytest +import scipy as sp import xarray as xr import xtgeo from iterative_ensemble_smoother import SIES @@ -25,8 +28,9 @@ from ert.analysis.row_scaling import RowScaling from ert.cli import ENSEMBLE_SMOOTHER_MODE from ert.cli.main import run_cli -from ert.config import AnalysisConfig, ErtConfig, GenDataConfig, GenKwConfig +from ert.config import AnalysisConfig, ErtConfig, Field, GenDataConfig, GenKwConfig from ert.config.analysis_module import ESSettings, IESSettings +from ert.field_utils import Shape from ert.storage import open_storage from ert.storage.realization_storage_state import RealizationStorageState @@ -611,28 +615,18 @@ def _load_parameters(source_ens, iens_active_index, param_groups): def test_and_benchmark_adaptive_localization_with_fields( storage, tmp_path, monkeypatch, benchmark ): - from functools import partial - - import gstools as gs - import scipy as sp - - from ert.config import Field - from ert.field_utils import Shape - monkeypatch.chdir(tmp_path) rng = np.random.default_rng(42) - # Number of grid-cells in x and y direction - nx = 40 - # Dimensionality of the problem - num_parameters = nx * nx + num_grid_cells = 40 + num_parameters = num_grid_cells * num_grid_cells num_observations = 50 num_ensemble = 25 + # Create a tridiagonal matrix that maps responses to parameters. + # Being tridiagonal, it ensures that each response is influenced only by its neighboring parameters. diagonal = np.ones(min(num_parameters, num_observations)) - - # Create a tridiagonal matrix (easiest with scipy) A = sp.sparse.diags( [diagonal, diagonal, diagonal], offsets=[-1, 0, 1], @@ -648,24 +642,25 @@ def g(X): """Apply the forward model.""" return A @ X + # Initialize an ensemble representing the prior distribution of parameters using spatial random fields. model = gs.Exponential(dim=2, var=2, len_scale=8) - fields = [] seed = gs.random.MasterRNG(20170519) for _ in range(num_ensemble): srf = gs.SRF(model, seed=seed()) - field = srf.structured([np.arange(nx), np.arange(nx)]) + field = srf.structured([np.arange(num_grid_cells), np.arange(num_grid_cells)]) fields.append(field) - X = np.vstack([field.flatten() for field in fields]).T + Y = g(X) - # Create observations: obs = g(x) + N(0, 1) - x_true = np.linspace(-1, 1, num=num_parameters) - observation_noise = rng.standard_normal(size=num_observations) # N(0, 1) noise - observations = g(x_true) + observation_noise + # Create observations by adding noise to a realization. + observation_noise = rng.standard_normal(size=num_observations) + observations = Y[:, 0] + observation_noise - shape = Shape(nx, nx, 1) + # Create necessary files and data sets to be able to update + # the parameters using the ensemble smoother. + shape = Shape(num_grid_cells, num_grid_cells, 1) grid = xtgeo.create_box_grid(dimension=(shape.nx, shape.ny, shape.nz)) grid.to_file("MY_EGRID.EGRID", "egrid") From 04b2d8e99a1e56280908a7fd2eca2fcc5fc02b15 Mon Sep 17 00:00:00 2001 From: Feda Curic Date: Tue, 12 Dec 2023 09:32:03 +0100 Subject: [PATCH 3/3] Point benchmark action to main --- .github/workflows/benchmark.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 85b77edbced..322812463a3 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -2,7 +2,7 @@ name: Benchmark Adaptive Localization on: push: branches: - - benchmark + - main permissions: # deployments permission to deploy GitHub pages website