Make work with export

equinor · Dec 20, 2024 · c694c16 · c694c16
1 parent 4d32b9e
commit c694c16
Show file tree

Hide file tree

Showing 7 changed files with 443 additions and 315 deletions.
diff --git a/src/ert/run_models/everest_run_model.py b/src/ert/run_models/everest_run_model.py
@@ -21,13 +21,13 @@
 )
 
 import numpy as np
-from seba_sqlite import SqliteStorage, sqlite_storage
 from numpy import float64
 from numpy._typing import NDArray
 from ropt.enums import EventType, OptimizerExitCode
 from ropt.evaluator import EvaluatorContext, EvaluatorResult
 from ropt.plan import BasicOptimizer
 from ropt.plan import Event as OptimizerEvent
+from seba_sqlite import SqliteStorage, sqlite_storage
 from typing_extensions import TypedDict
 
 from _ert.events import EESnapshot, EESnapshotUpdate, Event
@@ -130,7 +130,7 @@ def __call__(self) -> str | None: ...
 
 
 @dataclass
-class OptimalResult:
+class OptimalResult:  # noqa
     batch: int
     controls: list[Any]
     total_objective: float
@@ -297,7 +297,7 @@ def run_experiment(
 
         # Seems ROPT batches are 1-indexed now,
         # whereas seba has its own 0-indexed counter.
-        assert self._result == optimal_result_from_everstorage
+        assert self._result.__dict__ == optimal_result_from_everstorage.__dict__
 
         self._exit_code = (
             "max_batch_num_reached"

diff --git a/src/everest/api/everest_data_api.py b/src/everest/api/everest_data_api.py
@@ -1,8 +1,9 @@
-from collections import OrderedDict
 from pathlib import Path
 
+import polars
 import polars as pl
-from seba_sqlite.snapshot import SebaSnapshot
+from ropt.enums import ConstraintType
+from seba_sqlite import SebaSnapshot
 
 from ert.storage import open_storage
 from everest.config import EverestConfig, ServerConfig
@@ -20,42 +21,29 @@ def __init__(self, config: EverestConfig, filter_out_gradient=True):
 
     @property
     def batches(self):
-        batch_ids = list({opt.batch_id for opt in self._snapshot.optimization_data})
-        batch_ids2 = sorted(
+        return sorted(
             b.batch_id
             for b in self._ever_storage.data.batches
             if b.batch_objectives is not None
         )
-        assert batch_ids == batch_ids2
-        return sorted(batch_ids)
 
     @property
     def accepted_batches(self):
-        batch_ids = list(
-            {opt.batch_id for opt in self._snapshot.optimization_data if opt.merit_flag}
-        )
-        batch_ids2 = sorted(
+        return sorted(
             b.batch_id for b in self._ever_storage.data.batches if b.is_improvement
         )
-        assert batch_ids == batch_ids2
-
-        return sorted(batch_ids)
 
     @property
     def objective_function_names(self):
-        original = [fnc.name for fnc in self._snapshot.metadata.objectives.values()]
-        new = sorted(
+        return sorted(
             self._ever_storage.data.objective_functions["objective_name"]
             .unique()
             .to_list()
         )
-        assert original == new
-        return original
 
     @property
     def output_constraint_names(self):
-        original = [fnc.name for fnc in self._snapshot.metadata.constraints.values()]
-        new = (
+        return (
             sorted(
                 self._ever_storage.data.nonlinear_constraints["constraint_name"]
                 .unique()
@@ -64,28 +52,16 @@ def output_constraint_names(self):
             if self._ever_storage.data.nonlinear_constraints is not None
             else []
         )
-        assert original == new
-        return original
 
     def input_constraint(self, control):
-        controls = [
-            con
-            for con in self._snapshot.metadata.controls.values()
-            if con.name == control
-        ]
-
-        original = {"min": controls[0].min_value, "max": controls[0].max_value}
-
         initial_values = self._ever_storage.data.initial_values
         control_spec = initial_values.filter(
             pl.col("control_name") == control
         ).to_dicts()[0]
-        new = {
+        return {
             "min": control_spec.get("lower_bounds"),
             "max": control_spec.get("upper_bounds"),
         }
-        assert new == original
-        return original
 
     def output_constraint(self, constraint):
         """
@@ -95,146 +71,122 @@ def output_constraint(self, constraint):
                  "right_hand_side" is a constant real number that indicates
                  the constraint bound/target.
         """
-        constraints = [
-            con
-            for con in self._snapshot.metadata.constraints.values()
-            if con.name == constraint
-        ]
-
-        old = {
-            "type": constraints[0].constraint_type,
-            "right_hand_side": constraints[0].rhs_value,
-        }
 
         constraint_dict = self._ever_storage.data.nonlinear_constraints.to_dicts()[0]
-        new = {
-            "type": constraint_dict["constraint_type"],
-            "right_hand_side": constraint_dict["rhs_value"],
+        return {
+            "type": ConstraintType(constraint_dict["constraint_type"]).name.lower(),
+            "right_hand_side": constraint_dict["constraint_rhs_value"],
         }
 
-        assert old == new
-        return new
-
     @property
     def realizations(self):
-        old = list(
-            OrderedDict.fromkeys(
-                int(sim.realization) for sim in self._snapshot.simulation_data
-            )
-        )
-        new = sorted(
+        return sorted(
             self._ever_storage.data.batches[0]
             .realization_objectives["realization"]
             .unique()
             .to_list()
         )
-        assert old == new
-        return new
 
     @property
     def simulations(self):
-        old = list(
-            OrderedDict.fromkeys(
-                [int(sim.simulation) for sim in self._snapshot.simulation_data]
-            )
-        )
-
-        new = sorted(
+        return sorted(
             self._ever_storage.data.batches[0]
-            .realization_objectives["result_id"]
+            .realization_objectives["simulation_id"]
             .unique()
             .to_list()
         )
-        assert old == new
-        return new
 
     @property
     def control_names(self):
-        old = [con.name for con in self._snapshot.metadata.controls.values()]
-        new = sorted(
+        return sorted(
             self._ever_storage.data.initial_values["control_name"].unique().to_list()
         )
-        assert old == new
-        return new
 
     @property
     def control_values(self):
-        controls = [con.name for con in self._snapshot.metadata.controls.values()]
-        return [
-            {"control": con, "batch": sim.batch, "value": sim.controls[con]}
-            for sim in self._snapshot.simulation_data
-            for con in controls
-            if con in sim.controls
-        ]
+        all_control_names = self._ever_storage.data.initial_values[
+            "control_name"
+        ].to_list()
+        new = []
+        for batch in self._ever_storage.data.batches:
+            if batch.realization_controls is None:
+                continue
+
+            for controls_dict in batch.realization_controls.to_dicts():
+                for name in all_control_names:
+                    new.append(
+                        {
+                            "control": name,
+                            "batch": batch.batch_id,
+                            "value": controls_dict[name],
+                        }
+                    )
+
+        return new
 
     @property
     def objective_values(self):
-        old = [
-            {
-                "function": objective.name,
-                "batch": sim.batch,
-                "realization": sim.realization,
-                "simulation": sim.simulation,
-                "value": sim.objectives[objective.name],
-                "weight": objective.weight,
-                "norm": objective.normalization,
-            }
-            for sim in self._snapshot.simulation_data
-            for objective in self._snapshot.metadata.objectives.values()
-            if objective.name in sim.objectives
-        ]
-
-        new = [
+        return [
             b for b in self._ever_storage.data.batches if b.batch_objectives is not None
         ]
 
-        assert old == new
-
-        return old
-
     @property
     def single_objective_values(self):
-        single_obj = [
-            {
-                "batch": optimization_el.batch_id,
-                "objective": optimization_el.objective_value,
-                "accepted": optimization_el.merit_flag,
-            }
-            for optimization_el in self._snapshot.optimization_data
-        ]
-        metadata = {
-            func.name: {"weight": func.weight, "norm": func.normalization}
-            for func in self._snapshot.metadata.functions.values()
-            if func.function_type == func.FUNCTION_OBJECTIVE_TYPE
-        }
-        if len(metadata) == 1:
-            return single_obj
-        objectives = []
-        for name, values in self._snapshot.expected_objectives.items():
-            for idx, val in enumerate(values):
-                factor = metadata[name]["weight"] * metadata[name]["norm"]
-                if len(objectives) > idx:
-                    objectives[idx].update({name: val * factor})
-                else:
-                    objectives.append({name: val * factor})
-        for idx, obj in enumerate(single_obj):
-            obj.update(objectives[idx])
+        batch_datas = polars.concat(
+            [
+                b.batch_objectives.select(
+                    c for c in b.batch_objectives.columns if c != "merit_value"
+                ).with_columns(
+                    polars.lit(1 if b.is_improvement else 0).alias("accepted")
+                )
+                for b in self._ever_storage.data.batches
+                if b.realization_controls is not None
+            ]
+        )
+        objectives = self._ever_storage.data.objective_functions
 
-        return single_obj
+        for o in objectives.to_dicts():
+            batch_datas = batch_datas.with_columns(
+                polars.col(o["objective_name"]) * o["weight"] * o["normalization"]
+            )
+
+        return (
+            batch_datas.rename(
+                {"total_objective_value": "objective", "batch_id": "batch"}
+            )
+            .select("batch", "objective", "accepted")
+            .to_dicts()
+        )
 
     @property
     def gradient_values(self):
-        return [
-            {
-                "batch": optimization_el.batch_id,
-                "function": function,
-                "control": control,
-                "value": value,
-            }
-            for optimization_el in self._snapshot.optimization_data
-            for function, info in optimization_el.gradient_info.items()
-            for control, value in info.items()
+        all_batch_data = [
+            b.batch_objective_gradient
+            for b in self._ever_storage.data.batches
+            if b.batch_objective_gradient is not None
+        ]
+        if not all_batch_data:
+            return []
+
+        all_info = polars.concat(all_batch_data).drop("result_id")
+        objective_columns = [
+            c
+            for c in all_info.drop(["batch_id", "control_name"]).columns
+            if not c.endswith(".total")
         ]
+        return (
+            all_info.select("batch_id", "control_name", *objective_columns)
+            .unpivot(
+                on=objective_columns,
+                index=["batch_id", "control_name"],
+                variable_name="function",
+                value_name="value",
+            )
+            .rename({"control_name": "control", "batch_id": "batch"})
+            .sort(by=["batch", "control"])
+            .select(["batch", "function", "control", "value"])
+            .to_dicts()
+        )
 
     def summary_values(self, batches=None, keys=None):
         if batches is None:
@@ -265,13 +217,8 @@ def summary_values(self, batches=None, keys=None):
                 summary = summary.with_columns(
                     pl.Series("batch", [batch_id] * summary.shape[0])
                 )
-                # The realization ID as defined by Everest must be
-                # retrieved via the seba snapshot.
-                realization_map = {
-                    sim.simulation: sim.realization
-                    for sim in self._snapshot.simulation_data
-                    if sim.batch == batch_id
-                }
+
+                realization_map = self._ever_storage.data.simulation_to_realization_map
                 realizations = pl.Series(
                     "realization",
                     [realization_map.get(str(sim)) for sim in summary["simulation"]],