From 6e7d720025f7175b8b3d12eab5494fb183e40eee Mon Sep 17 00:00:00 2001 From: Eivind Jahren Date: Mon, 4 Sep 2023 15:43:10 +0200 Subject: [PATCH] Go back to c implementation of summary loading This removes the following unwanted behaviors of the current implementation: * The call to _init_numpy_vector_interp changes some values, probably due to interpolation. * Iterating over ecl_sum uses both ecl_smspec_node->key1 and ecl_smspec_node->key2 which will result in duplicate entries for summary keyword types that have both types. * DATE is included in the values loaded However, the following behavior is kept from the most recent implementation: * All report steps are loaded, not just those in the time map. --- src/clib/lib/CMakeLists.txt | 1 + src/clib/lib/enkf/read_summary.cpp | 50 +++++++++++++++++++ src/ert/config/summary_config.py | 28 +++-------- .../0/summary_collector_1.csv | 10 ++-- tests/unit_tests/test_libres_facade.py | 2 +- 5 files changed, 64 insertions(+), 27 deletions(-) create mode 100644 src/clib/lib/enkf/read_summary.cpp diff --git a/src/clib/lib/CMakeLists.txt b/src/clib/lib/CMakeLists.txt index 88c02ae074a..704ed850993 100644 --- a/src/clib/lib/CMakeLists.txt +++ b/src/clib/lib/CMakeLists.txt @@ -14,6 +14,7 @@ pybind11_add_module( job_queue/torque_driver.cpp job_queue/spawn.cpp enkf/enkf_obs.cpp + enkf/read_summary.cpp enkf/row_scaling.cpp) # ----------------------------------------------------------------- diff --git a/src/clib/lib/enkf/read_summary.cpp b/src/clib/lib/enkf/read_summary.cpp new file mode 100644 index 00000000000..9cbdf018c3f --- /dev/null +++ b/src/clib/lib/enkf/read_summary.cpp @@ -0,0 +1,50 @@ +#include +#include +#include +#include +#include +#include +#include + +static bool matches(std::vector patterns, std::string key) { + bool has_key = false; + for (auto pattern : patterns) { + if (fnmatch(pattern.c_str(), key.c_str(), 0) == 0) { + has_key = true; + break; + } + } + return has_key; +} +ERT_CLIB_SUBMODULE("_read_summary", m) { + m.def("read_summary", + [](Cwrap summary, std::vector keys) { + const int step2 = ecl_sum_get_last_report_step(summary); + const ecl_smspec_type *smspec = ecl_sum_get_smspec(summary); + std::vector>> + summary_vectors{}; + + for (int i = 0; i < ecl_smspec_num_nodes(smspec); i++) { + const ecl::smspec_node &smspec_node = + ecl_smspec_iget_node_w_node_index(smspec, i); + const char *key = smspec_node.get_gen_key1(); + if (matches(keys, key)) { + int start = ecl_sum_get_first_report_step(summary); + int end = ecl_sum_get_last_report_step(summary); + std::vector data{}; + int key_index = + ecl_sum_get_general_var_params_index(summary, key); + for (int tstep = start; tstep <= end; tstep++) { + if (ecl_sum_has_report_step(summary, tstep)) { + int time_index = + ecl_sum_iget_report_end(summary, tstep); + data.push_back( + ecl_sum_iget(summary, time_index, key_index)); + } + } + summary_vectors.emplace_back(key, data); + } + } + return summary_vectors; + }); +} diff --git a/src/ert/config/summary_config.py b/src/ert/config/summary_config.py index c2687b371ee..f99f72f7a9e 100644 --- a/src/ert/config/summary_config.py +++ b/src/ert/config/summary_config.py @@ -1,20 +1,19 @@ from __future__ import annotations -import ctypes import logging from dataclasses import dataclass from datetime import datetime -from fnmatch import fnmatch from typing import TYPE_CHECKING, Set -import numpy as np import xarray as xr from ecl.summary import EclSum +from ert._clib._read_summary import read_summary # pylint: disable=import-error + from .response_config import ResponseConfig if TYPE_CHECKING: - from typing import Any, List, Optional + from typing import List, Optional logger = logging.getLogger(__name__) @@ -40,7 +39,6 @@ def read_from_file(self, run_path: str, iens: int) -> xr.Dataset: f"file from: {run_path}/{filename}.UNSMRY", ) from e - data = [] c_time = summary.alloc_time_vector(True) time_map = [t.datetime() for t in c_time] if self.refcase: @@ -54,24 +52,12 @@ def read_from_file(self, run_path: str, iens: int) -> xr.Dataset: f"{last} from: {run_path}/{filename}.UNSMRY" ) - user_summary_keys = set(self.keys) - keys = sorted(list(iter(summary))) - for key in keys: - if not self._should_load_summary_key(key, user_summary_keys): - continue - - np_vector = np.zeros(len(time_map)) - summary._init_numpy_vector_interp( - key, - c_time, - np_vector.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), - ) - data.append(np_vector) + summary_data = read_summary(summary, self.keys) + summary_data.sort(key=lambda x: x[0]) + data = [d for _, d in summary_data] + keys = [k for k, _ in summary_data] return xr.Dataset( {"values": (["name", "time"], data)}, coords={"time": time_map, "name": keys}, ) - - def _should_load_summary_key(self, data_key: Any, user_set_keys: set[str]) -> bool: - return any(fnmatch(data_key, key) for key in user_set_keys) diff --git a/tests/unit_tests/snapshots/test_libres_facade/test_summary_collector/0/summary_collector_1.csv b/tests/unit_tests/snapshots/test_libres_facade/test_summary_collector/0/summary_collector_1.csv index ba6fdb0ece7..14ef7548276 100644 --- a/tests/unit_tests/snapshots/test_libres_facade/test_summary_collector/0/summary_collector_1.csv +++ b/tests/unit_tests/snapshots/test_libres_facade/test_summary_collector/0/summary_collector_1.csv @@ -1,5 +1,5 @@ -Realization,Date,"BPR:1,3,8",BPR:445,"BPR:5,5,5",BPR:721,FGIP,FGIPH,FGOR,FGORH,FGPR,FGPRH,FGPT,FGPTH,FOIP,FOIPH,FOPR,FOPRH,FOPT,FOPTH,FWCT,FWCTH,FWIP,FWIPH,FWPR,FWPRH,FWPT,FWPTH,WGOR:OP1,WGOR:OP2,WGORH:OP1,WGORH:OP2,WGPR:OP1,WGPR:OP2,WGPRH:OP1,WGPRH:OP2,WOPR:OP1,WOPR:OP2,WOPRH:OP1,WOPRH:OP2,WWCT:OP1,WWCT:OP2,WWCTH:OP1,WWCTH:OP2,WWPR:OP1,WWPR:OP2,WWPRH:OP1,WWPRH:OP2 -0,2010-01-10,0.9996,0.9996,0.9996,0.9996,2499.4473,2499.9956,1.0,1.0,0.0557,0.0012,0.5528,0.0044,1999.4462,1999.994,0.056,0.0017,0.5538,0.0059,0.1776,0.0002,2249.4492,2249.9998,0.0551,0.0,0.5507,0.0001,1.0,1.0,1.0,1.0,0.0557,0.0,0.0006,0.0006,0.056,0.0,0.0008,0.0008,0.3552,0.0,0.0001,0.0002,0.0551,0.0,0.0,0.0 -1,2010-01-10,0.9996,0.9996,0.9996,0.9996,2499.8467,2499.9956,1.0,1.0,0.0157,0.0012,0.1533,0.0044,1999.8458,1999.994,0.016,0.0017,0.1542,0.0059,0.0657,0.0002,2249.8489,2249.9998,0.0151,0.0,0.1512,0.0001,1.0,1.0,1.0,1.0,0.0,0.0157,0.0006,0.0006,0.0,0.016,0.0008,0.0008,0.0,0.1314,0.0001,0.0002,0.0,0.0151,0.0,0.0 -2,2010-01-10,0.9996,0.9996,0.9996,0.9996,2500.0,2499.9956,1.0,1.0,0.0,0.0012,0.0,0.0044,2000.0,1999.994,0.0,0.0017,0.0,0.0059,0.0,0.0002,2250.0,2249.9998,0.0,0.0,0.0,0.0001,1.0,1.0,1.0,1.0,0.0,0.0,0.0006,0.0006,0.0,0.0,0.0008,0.0008,0.0,0.0,0.0001,0.0002,0.0,0.0,0.0,0.0 -3,2010-01-10,0.9996,0.9996,0.9996,0.9996,2497.1733,2499.9956,0.9994,1.0,0.2835,0.0012,2.8267,0.0044,1997.1715,1999.994,0.284,0.0017,2.8285,0.0059,0.4825,0.0002,2247.1775,2249.9998,0.2823,0.0,2.8224,0.0001,1.0,0.9987,1.0,1.0,0.0879,0.1956,0.0006,0.0006,0.0882,0.1958,0.0008,0.0008,0.4661,0.4989,0.0001,0.0002,0.0873,0.195,0.0,0.0 +Realization,Date,"BPR:1,3,8","BPR:5,5,5",FGIP,FGIPH,FGOR,FGORH,FGPR,FGPRH,FGPT,FGPTH,FOIP,FOIPH,FOPR,FOPRH,FOPT,FOPTH,FWCT,FWCTH,FWIP,FWIPH,FWPR,FWPRH,FWPT,FWPTH,WGOR:OP1,WGOR:OP2,WGORH:OP1,WGORH:OP2,WGPR:OP1,WGPR:OP2,WGPRH:OP1,WGPRH:OP2,WOPR:OP1,WOPR:OP2,WOPRH:OP1,WOPRH:OP2,WWCT:OP1,WWCT:OP2,WWCTH:OP1,WWCTH:OP2,WWPR:OP1,WWPR:OP2,WWPRH:OP1,WWPRH:OP2 +0,2010-01-10,0.9996,0.9996,2499.4473,2499.9956,1.0,1.0,0.0557,0.0012,0.5528,0.0044,1999.4462,1999.994,0.056,0.0017,0.5538,0.0059,0.1776,0.0002,2249.4492,2249.9998,0.0551,0.0,0.5507,0.0001,1.0,1.0,1.0,1.0,0.0557,0.0,0.0006,0.0006,0.056,0.0,0.0008,0.0008,0.3552,0.0,0.0001,0.0002,0.0551,0.0,0.0,0.0 +1,2010-01-10,0.9996,0.9996,2499.8467,2499.9956,1.0,1.0,0.0157,0.0012,0.1533,0.0044,1999.8458,1999.994,0.016,0.0017,0.1542,0.0059,0.0657,0.0002,2249.8489,2249.9998,0.0151,0.0,0.1512,0.0001,1.0,1.0,1.0,1.0,0.0,0.0157,0.0006,0.0006,0.0,0.016,0.0008,0.0008,0.0,0.1314,0.0001,0.0002,0.0,0.0151,0.0,0.0 +2,2010-01-10,0.9996,0.9996,2500.0,2499.9956,1.0,1.0,0.0,0.0012,0.0,0.0044,2000.0,1999.994,0.0,0.0017,0.0,0.0059,0.0,0.0002,2250.0,2249.9998,0.0,0.0,0.0,0.0001,1.0,1.0,1.0,1.0,0.0,0.0,0.0006,0.0006,0.0,0.0,0.0008,0.0008,0.0,0.0,0.0001,0.0002,0.0,0.0,0.0,0.0 +3,2010-01-10,0.9996,0.9996,2497.1733,2499.9956,0.9994,1.0,0.2835,0.0012,2.8267,0.0044,1997.1715,1999.994,0.284,0.0017,2.8285,0.0059,0.4825,0.0002,2247.1775,2249.9998,0.2823,0.0,2.8224,0.0001,1.0,0.9987,1.0,1.0,0.0879,0.1956,0.0006,0.0006,0.0882,0.1958,0.0008,0.0008,0.4661,0.4989,0.0001,0.0002,0.0873,0.195,0.0,0.0 diff --git a/tests/unit_tests/test_libres_facade.py b/tests/unit_tests/test_libres_facade.py index f052a2a73e3..48e7f8eb405 100644 --- a/tests/unit_tests/test_libres_facade.py +++ b/tests/unit_tests/test_libres_facade.py @@ -263,7 +263,7 @@ def test_summary_collector( data.iloc[:4].round(4).to_csv(), "summary_collector_1.csv", ) - assert data.shape == (1000, 46) + assert data.shape == (1000, 44) with pytest.raises(KeyError): # realization 60: _ = data.loc[60]