From 18017c70deaef30b460c57a5f4d33abc63b27459 Mon Sep 17 00:00:00 2001 From: larsevj Date: Thu, 26 Sep 2024 15:08:23 +0200 Subject: [PATCH] Add support for design matrix keyword in ert config - Expects the format: DESIGN_MATRIX file.xlsx DESIGN_SHEET:design DEFAULT_SHEET:default where file.xlsx is an existing file. - Scaffolding for further support for reading parameter values from design matrix excel files. --- src/ert/config/analysis_config.py | 8 +++ src/ert/config/design_matrix.py | 52 ++++++++++++++++++ src/ert/config/parsing/config_keywords.py | 1 + src/ert/config/parsing/config_schema.py | 15 ++++++ .../unit_tests/config/test_analysis_config.py | 54 ++++++++++++++++++- 5 files changed, 129 insertions(+), 1 deletion(-) create mode 100644 src/ert/config/design_matrix.py diff --git a/src/ert/config/analysis_config.py b/src/ert/config/analysis_config.py index 6c8fbc8aa56..a2afdf9ad6f 100644 --- a/src/ert/config/analysis_config.py +++ b/src/ert/config/analysis_config.py @@ -10,6 +10,7 @@ from pydantic import ValidationError from .analysis_module import ESSettings, IESSettings +from .design_matrix import DesignMatrix from .parsing import ( AnalysisMode, ConfigDict, @@ -40,6 +41,7 @@ class AnalysisConfig: ies_module: IESSettings = field(default_factory=IESSettings) observation_settings: UpdateSettings = field(default_factory=UpdateSettings) num_iterations: int = 1 + design_matrix_args: Optional[DesignMatrix] = None @no_type_check @classmethod @@ -78,6 +80,9 @@ def from_dict(cls, config_dict: ConfigDict) -> "AnalysisConfig": ) min_realization = min(min_realization, num_realization) + + design_matrix_config_list = config_dict.get(ConfigKeys.DESIGN_MATRIX, None) + options: Dict[str, Dict[str, Any]] = {"STD_ENKF": {}, "IES_ENKF": {}} observation_settings: Dict[str, Any] = { "alpha": config_dict.get(ConfigKeys.ENKF_ALPHA, 3.0), @@ -189,6 +194,9 @@ def from_dict(cls, config_dict: ConfigDict) -> "AnalysisConfig": observation_settings=obs_settings, es_module=es_settings, ies_module=ies_settings, + design_matrix_args=DesignMatrix.from_config_list(design_matrix_config_list) + if design_matrix_config_list is not None + else None, ) return config diff --git a/src/ert/config/design_matrix.py b/src/ert/config/design_matrix.py new file mode 100644 index 00000000000..20b5fd8df0d --- /dev/null +++ b/src/ert/config/design_matrix.py @@ -0,0 +1,52 @@ +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path +from typing import List + +from ._option_dict import option_dict +from .parsing import ( + ConfigValidationError, + ErrorInfo, +) + + +@dataclass +class DesignMatrix: + xls_filename: Path + design_sheet: str + default_sheet: str + + @classmethod + def from_config_list(cls, config_list: List[str]) -> "DesignMatrix": + filename = Path(config_list[0]) + options = option_dict(config_list, 1) + design_sheet = options.get("DESIGN_SHEET") + default_sheet = options.get("DEFAULT_SHEET") + errors = [] + if filename.suffix not in { + ".xlsx", + ".xls", + }: + errors.append( + ErrorInfo( + f"DESIGN_MATRIX must be of format .xls or .xlsx; is '{filename}'" + ).set_context(config_list) + ) + if design_sheet is None: + errors.append( + ErrorInfo("Missing required DESIGN_SHEET").set_context(config_list) + ) + if default_sheet is None: + errors.append( + ErrorInfo("Missing required DEFAULT_SHEET").set_context(config_list) + ) + if errors: + raise ConfigValidationError.from_collected(errors) + assert design_sheet is not None + assert default_sheet is not None + return cls( + xls_filename=filename, + design_sheet=design_sheet, + default_sheet=default_sheet, + ) diff --git a/src/ert/config/parsing/config_keywords.py b/src/ert/config/parsing/config_keywords.py index d9aefd291e2..956216b7c5f 100644 --- a/src/ert/config/parsing/config_keywords.py +++ b/src/ert/config/parsing/config_keywords.py @@ -27,6 +27,7 @@ class ConfigKeys(StrEnum): JOB_SCRIPT = "JOB_SCRIPT" JOBNAME = "JOBNAME" MAX_SUBMIT = "MAX_SUBMIT" + DESIGN_MATRIX = "DESIGN_MATRIX" NUM_REALIZATIONS = "NUM_REALIZATIONS" MIN_REALIZATIONS = "MIN_REALIZATIONS" OBS_CONFIG = "OBS_CONFIG" diff --git a/src/ert/config/parsing/config_schema.py b/src/ert/config/parsing/config_schema.py index 6d5d7ecbcbe..6adba3c9cba 100644 --- a/src/ert/config/parsing/config_schema.py +++ b/src/ert/config/parsing/config_schema.py @@ -261,6 +261,20 @@ def install_job_directory_keyword() -> SchemaItem: ) +def design_matrix_keyword() -> SchemaItem: + return SchemaItem( + kw=ConfigKeys.DESIGN_MATRIX, + argc_min=3, + argc_max=3, + type_map=[ + SchemaItemType.EXISTING_PATH, + SchemaItemType.STRING, + SchemaItemType.STRING, + ], + multi_occurrence=False, + ) + + class ConfigSchemaDict(SchemaItemDict): def check_required( self, @@ -345,6 +359,7 @@ def init_user_config_schema() -> ConfigSchemaDict: positive_int_keyword(ConfigKeys.NUM_CPU), positive_int_keyword(ConfigKeys.MAX_RUNNING), string_keyword(ConfigKeys.REALIZATION_MEMORY), + design_matrix_keyword(), queue_system_keyword(False), queue_option_keyword(), job_script_keyword(), diff --git a/tests/ert/unit_tests/config/test_analysis_config.py b/tests/ert/unit_tests/config/test_analysis_config.py index 3c594d73771..ce2535e2e0a 100644 --- a/tests/ert/unit_tests/config/test_analysis_config.py +++ b/tests/ert/unit_tests/config/test_analysis_config.py @@ -14,13 +14,17 @@ from ert.config.parsing import ConfigKeys, ConfigWarning -def test_analysis_config_from_file_is_same_as_from_dict(): +def test_analysis_config_from_file_is_same_as_from_dict(monkeypatch, tmp_path): + with open(tmp_path / "my_design_matrix.xlsx", "w", encoding="utf-8"): + pass + monkeypatch.chdir(tmp_path) assert ErtConfig.from_file_contents( dedent( """ NUM_REALIZATIONS 10 MIN_REALIZATIONS 10 ANALYSIS_SET_VAR STD_ENKF ENKF_TRUNCATION 0.8 + DESIGN_MATRIX my_design_matrix.xlsx DESIGN_SHEET:my_sheet DEFAULT_SHEET:my_default_sheet """ ) ).analysis_config == AnalysisConfig.from_dict( @@ -30,6 +34,11 @@ def test_analysis_config_from_file_is_same_as_from_dict(): ConfigKeys.ANALYSIS_SET_VAR: [ ("STD_ENKF", "ENKF_TRUNCATION", 0.8), ], + ConfigKeys.DESIGN_MATRIX: [ + "my_design_matrix.xlsx", + "DESIGN_SHEET:my_sheet", + "DEFAULT_SHEET:my_default_sheet", + ], } ) @@ -80,6 +89,49 @@ def test_invalid_min_realization_raises_config_validation_error(): ) +def test_invalid_design_matrix_format_raises_validation_error(): + with pytest.raises( + ConfigValidationError, + match="DESIGN_MATRIX must be of format .xls or .xlsx; is 'my_matrix.txt'", + ): + AnalysisConfig.from_dict( + { + ConfigKeys.NUM_REALIZATIONS: 1, + ConfigKeys.DESIGN_MATRIX: [ + "my_matrix.txt", + "DESIGN_SHEET:sheet1", + "DEFAULT_SHEET:sheet2", + ], + } + ) + + +def test_design_matrix_without_design_sheet_raises_validation_error(): + with pytest.raises(ConfigValidationError, match="Missing required DESIGN_SHEET"): + AnalysisConfig.from_dict( + { + ConfigKeys.DESIGN_MATRIX: [ + "my_matrix.xlsx", + "DESIGN_:design", + "DEFAULT_SHEET:default", + ], + } + ) + + +def test_design_matrix_without_default_sheet_raises_validation_error(): + with pytest.raises(ConfigValidationError, match="Missing required DEFAULT_SHEET"): + AnalysisConfig.from_dict( + { + ConfigKeys.DESIGN_MATRIX: [ + "my_matrix.xlsx", + "DESIGN_SHEET:design", + "DEFAULT_:default", + ], + } + ) + + def test_invalid_min_realization_percentage_raises_config_validation_error(): with pytest.raises( ConfigValidationError,