Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Validate derrf distribution parameters on startup #9599

Merged
merged 1 commit into from
Jan 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions src/ert/config/gen_kw_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,32 @@ def _check_valid_triangular_parameters(prior: PriorDict) -> None:
).set_context(self.name)
)

def _check_valid_derrf_parameters(prior: PriorDict) -> None:
Copy link
Collaborator

@oyvindeide oyvindeide Dec 20, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be outside the scope for this PR, but did you consider converting this from a function to a dataclass? Something like:

@pydantic.dataclass
class DERRF:
    name: str
    steps: PositiveInt
    min: float
    max: float
    skewness: float
    width: confloat(gt=0)

    @model_validator(mode="after")
    def validate_min_max(self) -> Self:
        if not self.max > self.min
            raise ValueError(f"Max ({self.max}) must be larger than max ({self.max})
        return self
 
    def transform(self, x: float) -> float:
        """
        Bin the result of `trans_errf` with `min=0` and `max=1` to closest of `nbins`
        linearly spaced values on [0,1]. Finally map [0,1] to [min, max].
        """
        q_values = np.linspace(start=0, stop=1, num=self.steps)
        q_checks = np.linspace(start=0, stop=1, num=self.steps + 1)[1:]
        y = TransformFunction.trans_errf(x, [0, 1, self.skewness, self.width])
        bin_index = np.digitize(y, q_checks, right=True)
        y_binned = q_values[bin_index]
        result = self.min + y_binned * (self.max - self.min)
        if result > self.max or result < self.min:
            warnings.warn(
                "trans_derff suffered from catastrophic loss of precision, clamping to min,max",
                stacklevel=1,
            )
            return np.clip(result, self.min, self.max)
        if np.isnan(result):
            raise ValueError(
                "trans_derrf returns nan, check that input arguments are reasonable"
            )
        return result

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did not, but will look into it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess this will require a storage migration?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I guess it will, though it would be possible to work around it for now, and only migrate once we do something like this for all parameter types. If that is the direction you are heading with rewriting parameter configs?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suggest we merge this fix for now and then you can do this conversion later.

key = prior["key"]
dist = prior["function"]
steps, min_, max_, _, width = prior["parameters"].values()
if not (steps >= 1 and steps.is_integer()):
errors.append(
ErrorInfo(
f"NBINS {steps} must be a positive integer larger than 1"
f" for {dist} distributed parameter {key}",
).set_context(self.name)
)
if not (min_ < max_):
errors.append(
ErrorInfo(
f"The minimum {min_} must be less than the maximum {max_}"
f" for {dist} distributed parameter {key}",
).set_context(self.name)
)
if not (width > 0):
errors.append(
ErrorInfo(
f"The width {width} must be greater than 0"
f" for {dist} distributed parameter {key}",
).set_context(self.name)
)

unique_keys = set()
for prior in self.get_priors():
key = prior["key"]
Expand All @@ -240,6 +266,8 @@ def _check_valid_triangular_parameters(prior: PriorDict) -> None:
_check_non_negative_parameter("STD", prior)
elif prior["function"] == "TRIANGULAR":
_check_valid_triangular_parameters(prior)
elif prior["function"] == "DERRF":
_check_valid_derrf_parameters(prior)
elif prior["function"] in {"NORMAL", "TRUNCATED_NORMAL"}:
_check_non_negative_parameter("STD", prior)
if errors:
Expand Down
123 changes: 121 additions & 2 deletions tests/ert/unit_tests/config/test_gen_kw_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def test_gen_kw_config_get_priors():
f.write("KEY5 UNIFORM 2 3\n")
f.write("KEY6 DUNIF 3 0 1\n")
f.write("KEY7 ERRF 0 1 2 3\n")
f.write("KEY8 DERRF 0 1 2 3 4\n")
f.write("KEY8 DERRF 1 1 2 3 4\n")
f.write("KEY9 LOGUNIF 0 1\n")
f.write("KEY10 CONST 10\n")

Expand Down Expand Up @@ -143,7 +143,7 @@ def test_gen_kw_config_get_priors():
assert {
"key": "KEY8",
"function": "DERRF",
"parameters": {"STEPS": 0, "MIN": 1, "MAX": 2, "SKEWNESS": 3, "WIDTH": 4},
"parameters": {"STEPS": 1, "MIN": 1, "MAX": 2, "SKEWNESS": 3, "WIDTH": 4},
} in priors

assert {
Expand Down Expand Up @@ -681,3 +681,122 @@ def test_validation_triangular_distribution(
ErtConfig.from_file("config.ert")
else:
ErtConfig.from_file("config.ert")


@pytest.mark.parametrize(
"distribution, nbins, min, max, skew, width, error",
[
("DERRF", "10", "-1", "3", "-1", "2", None),
("DERRF", "100", "-10", "10", "0", "1", None),
("DERRF", "2", "-0.5", "0.5", "1", "0.1", None),
(
"DERRF",
"0",
"-1",
"3",
"-1",
"2",
"NBINS 0.0 must be a positive integer larger than 1 for DERRF distributed parameter MY_KEYWORD",
),
(
"DERRF",
"-5",
"-1",
"3",
"-1",
"2",
"NBINS -5.0 must be a positive integer larger than 1 for DERRF distributed parameter MY_KEYWORD",
),
(
"DERRF",
"1.5",
"-1",
"3",
"-1",
"2",
"NBINS 1.5 must be a positive integer larger than 1 for DERRF distributed parameter MY_KEYWORD",
),
(
"DERRF",
"10",
"3",
"-1",
"-1",
"2",
"The minimum 3.0 must be less than the maximum -1.0 for DERRF distributed parameter MY_KEYWORD",
),
(
"DERRF",
"10",
"1",
"1",
"-1",
"2",
"The minimum 1.0 must be less than the maximum 1.0 for DERRF distributed parameter MY_KEYWORD",
),
(
"DERRF",
"10",
"-1",
"3",
"-1",
"0",
"The width 0.0 must be greater than 0 for DERRF distributed parameter MY_KEYWORD",
),
(
"DERRF",
"10",
"-1",
"3",
"-1",
"-2",
"The width -2.0 must be greater than 0 for DERRF distributed parameter MY_KEYWORD",
),
(
"DERRF",
"2",
"-999999",
"999999",
"0",
"0.0001",
None,
),
(
"DERRF",
"1000",
"-0.001",
"0.001",
"0",
"0.0001",
None,
),
],
)
def test_validation_derrf_distribution(
tmpdir, distribution, nbins, min, max, skew, width, error
):
with tmpdir.as_cwd():
config = dedent(
"""
JOBNAME my_name%d
NUM_REALIZATIONS 1
GEN_KW KW_NAME template.txt kw.txt prior.txt
"""
)
with open("config.ert", "w", encoding="utf-8") as fh:
fh.writelines(config)
with open("template.txt", "w", encoding="utf-8") as fh:
fh.writelines("MY_KEYWORD <MY_KEYWORD>")
with open("prior.txt", "w", encoding="utf-8") as fh:
fh.writelines(
f"MY_KEYWORD {distribution} {nbins} {min} {max} {skew} {width}"
)

if error:
with pytest.raises(
ConfigValidationError,
match=error,
):
ErtConfig.from_file("config.ert")
else:
ErtConfig.from_file("config.ert")
Loading