Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve validations for derived metrics #294

Merged
merged 3 commits into from
Jun 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .changes/unreleased/Fixes-20240524-092249.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kind: Fixes
body: Improve validations for derived metrics. Improve error messaging if input metrics
are not set. Validate that expr is set and references all input metrics.
time: 2024-05-24T09:22:49.247603-07:00
custom:
Author: courtneyholcomb
Issue: "270"
62 changes: 54 additions & 8 deletions dbt_semantic_interfaces/validations/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
SemanticManifestValidationRule,
ValidationError,
ValidationIssue,
ValidationWarning,
generate_exception_issue,
validate_safely,
)
Expand Down Expand Up @@ -87,8 +88,9 @@ def _validate_alias_collision(metric: Metric) -> List[ValidationIssue]:
file_context=FileContext.from_metadata(metadata=metric.metadata),
metric=MetricModelReference(metric_name=metric.name),
)
used_names = {input_metric.name for input_metric in metric.input_metrics}
for input_metric in metric.input_metrics:
input_metrics = metric.type_params.metrics or []
used_names = {input_metric.name for input_metric in input_metrics}
for input_metric in input_metrics:
if input_metric.alias:
issues += UniqueAndValidNameRule.check_valid_name(input_metric.alias, metric_context)
if input_metric.alias in used_names:
Expand All @@ -109,15 +111,24 @@ def _validate_input_metrics_exist(semantic_manifest: SemanticManifest) -> List[V

all_metrics = {m.name for m in semantic_manifest.metrics}
for metric in semantic_manifest.metrics:
metric_context = MetricContext(
file_context=FileContext.from_metadata(metadata=metric.metadata),
metric=MetricModelReference(metric_name=metric.name),
)
if metric.type == MetricType.DERIVED:
for input_metric in metric.input_metrics:
if not metric.type_params.metrics:
issues.append(
ValidationError(
context=metric_context,
message=f"No input metrics found for derived metric '{metric.name}'. "
"Please add metrics to type_params.metrics.",
)
)
for input_metric in metric.type_params.metrics or []:
if input_metric.name not in all_metrics:
issues.append(
ValidationError(
context=MetricContext(
file_context=FileContext.from_metadata(metadata=metric.metadata),
metric=MetricModelReference(metric_name=metric.name),
),
context=metric_context,
message=f"For metric: {metric.name}, input metric: '{input_metric.name}' does not "
"exist as a configured metric in the model.",
)
Expand All @@ -129,7 +140,7 @@ def _validate_input_metrics_exist(semantic_manifest: SemanticManifest) -> List[V
def _validate_time_offset_params(metric: Metric) -> List[ValidationIssue]:
issues: List[ValidationIssue] = []

for input_metric in metric.input_metrics or []:
for input_metric in metric.type_params.metrics or []:
if input_metric.offset_window and input_metric.offset_to_grain:
issues.append(
ValidationError(
Expand All @@ -144,6 +155,40 @@ def _validate_time_offset_params(metric: Metric) -> List[ValidationIssue]:

return issues

@staticmethod
@validate_safely(whats_being_done="checking that the expr field uses the input metrics")
def _validate_expr(metric: Metric) -> List[ValidationIssue]:
issues: List[ValidationIssue] = []

if metric.type == MetricType.DERIVED:
if not metric.type_params.expr:
issues.append(
ValidationWarning(
context=MetricContext(
file_context=FileContext.from_metadata(metadata=metric.metadata),
metric=MetricModelReference(metric_name=metric.name),
),
message=f"No `expr` set for derived metric {metric.name}. "
"Please add an `expr` that references all input metrics.",
)
)
else:
for input_metric in metric.type_params.metrics or []:
name = input_metric.alias or input_metric.name
if name not in metric.type_params.expr:
issues.append(
ValidationWarning(
context=MetricContext(
file_context=FileContext.from_metadata(metadata=metric.metadata),
metric=MetricModelReference(metric_name=metric.name),
),
message=f"Input metric '{name}' is not used in `expr`: '{metric.type_params.expr}' for "
f"derived metric '{metric.name}'. Please update the `expr` or remove the input metric.",
)
)

return issues

@staticmethod
@validate_safely(
whats_being_done="running model validation ensuring derived metrics properties are configured properly"
Expand All @@ -155,6 +200,7 @@ def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[Validati
for metric in semantic_manifest.metrics or []:
issues += DerivedMetricRule._validate_alias_collision(metric=metric)
issues += DerivedMetricRule._validate_time_offset_params(metric=metric)
issues += DerivedMetricRule._validate_expr(metric=metric)
return issues


Expand Down
32 changes: 26 additions & 6 deletions tests/validations/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,21 @@ def test_derived_metric() -> None: # noqa: D
expr="notexist * 2", metrics=[PydanticMetricInput(name="notexist")]
),
),
metric_with_guaranteed_meta(
name="no_expr",
type=MetricType.DERIVED,
type_params=PydanticMetricTypeParams(metrics=[PydanticMetricInput(name="random_metric")]),
),
metric_with_guaranteed_meta(
name="input_metric_not_in_expr",
type=MetricType.DERIVED,
type_params=PydanticMetricTypeParams(expr="x", metrics=[PydanticMetricInput(name="random_metric")]),
),
metric_with_guaranteed_meta(
name="no_input_metrics",
type=MetricType.DERIVED,
type_params=PydanticMetricTypeParams(expr="x"),
),
metric_with_guaranteed_meta(
name="has_valid_time_window_params",
type=MetricType.DERIVED,
Expand Down Expand Up @@ -299,13 +314,18 @@ def test_derived_metric() -> None: # noqa: D
project_configuration=EXAMPLE_PROJECT_CONFIGURATION,
)
)
build_issues = validation_results.errors
assert len(build_issues) == 3
expected_substr1 = "is already being used. Please choose another alias"
expected_substr2 = "does not exist as a configured metric in the model"
expected_substr3 = "Both offset_window and offset_to_grain set"
build_issues = validation_results.all_issues
assert len(build_issues) == 6
expected_substrings = [
"is already being used. Please choose another alias",
"does not exist as a configured metric in the model",
"Both offset_window and offset_to_grain set",
"is not used in `expr`",
"No input metrics found for derived metric",
"No `expr` set for derived metric",
]
missing_error_strings = set()
for expected_str in [expected_substr1, expected_substr2, expected_substr3]:
for expected_str in expected_substrings:
if not any(actual_str.as_readable_str().find(expected_str) != -1 for actual_str in build_issues):
missing_error_strings.add(expected_str)
assert len(missing_error_strings) == 0, (
Expand Down
Loading