From e3ce0c0e531fd8051ea92b71357cf9d413d9e982 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Sun, 16 Jun 2024 09:40:16 -0700 Subject: [PATCH] Improve validations for derived metrics (#294) Resolves #270 ### Description Improve validations for derived metrics. - If there were no input metrics set, previously users would run into an `AssertionError` that interrupts the semantic manifest validation process. This changes that to be a more user-friendly validation error. - Add a validation warning to ensure that `expr` is set and that it references all the input metrics. Note: The first commit here was previously merged and then reverted because adding a `ValidationError` would be a breaking change. This PR updates it to be a `ValidationWarning`. ### Checklist - [x] I have read [the contributing guide](https://github.com/dbt-labs/dbt-semantic-interfaces/blob/main/CONTRIBUTING.md) and understand what's expected of me - [x] I have signed the [CLA](https://docs.getdbt.com/docs/contributor-license-agreements) - [x] This PR includes tests, or tests are not required/relevant for this PR - [x] I have run `changie new` to [create a changelog entry](https://github.com/dbt-labs/dbt-semantic-interfaces/blob/main/CONTRIBUTING.md#adding-a-changelog-entry) --- .../unreleased/Fixes-20240524-092249.yaml | 7 +++ .../validations/metrics.py | 62 ++++++++++++++++--- tests/validations/test_metrics.py | 32 ++++++++-- 3 files changed, 87 insertions(+), 14 deletions(-) create mode 100644 .changes/unreleased/Fixes-20240524-092249.yaml diff --git a/.changes/unreleased/Fixes-20240524-092249.yaml b/.changes/unreleased/Fixes-20240524-092249.yaml new file mode 100644 index 00000000..1a4b90ba --- /dev/null +++ b/.changes/unreleased/Fixes-20240524-092249.yaml @@ -0,0 +1,7 @@ +kind: Fixes +body: Improve validations for derived metrics. Improve error messaging if input metrics + are not set. Validate that expr is set and references all input metrics. +time: 2024-05-24T09:22:49.247603-07:00 +custom: + Author: courtneyholcomb + Issue: "270" diff --git a/dbt_semantic_interfaces/validations/metrics.py b/dbt_semantic_interfaces/validations/metrics.py index 4533a1df..4397768a 100644 --- a/dbt_semantic_interfaces/validations/metrics.py +++ b/dbt_semantic_interfaces/validations/metrics.py @@ -19,6 +19,7 @@ SemanticManifestValidationRule, ValidationError, ValidationIssue, + ValidationWarning, generate_exception_issue, validate_safely, ) @@ -87,8 +88,9 @@ def _validate_alias_collision(metric: Metric) -> List[ValidationIssue]: file_context=FileContext.from_metadata(metadata=metric.metadata), metric=MetricModelReference(metric_name=metric.name), ) - used_names = {input_metric.name for input_metric in metric.input_metrics} - for input_metric in metric.input_metrics: + input_metrics = metric.type_params.metrics or [] + used_names = {input_metric.name for input_metric in input_metrics} + for input_metric in input_metrics: if input_metric.alias: issues += UniqueAndValidNameRule.check_valid_name(input_metric.alias, metric_context) if input_metric.alias in used_names: @@ -109,15 +111,24 @@ def _validate_input_metrics_exist(semantic_manifest: SemanticManifest) -> List[V all_metrics = {m.name for m in semantic_manifest.metrics} for metric in semantic_manifest.metrics: + metric_context = MetricContext( + file_context=FileContext.from_metadata(metadata=metric.metadata), + metric=MetricModelReference(metric_name=metric.name), + ) if metric.type == MetricType.DERIVED: - for input_metric in metric.input_metrics: + if not metric.type_params.metrics: + issues.append( + ValidationError( + context=metric_context, + message=f"No input metrics found for derived metric '{metric.name}'. " + "Please add metrics to type_params.metrics.", + ) + ) + for input_metric in metric.type_params.metrics or []: if input_metric.name not in all_metrics: issues.append( ValidationError( - context=MetricContext( - file_context=FileContext.from_metadata(metadata=metric.metadata), - metric=MetricModelReference(metric_name=metric.name), - ), + context=metric_context, message=f"For metric: {metric.name}, input metric: '{input_metric.name}' does not " "exist as a configured metric in the model.", ) @@ -129,7 +140,7 @@ def _validate_input_metrics_exist(semantic_manifest: SemanticManifest) -> List[V def _validate_time_offset_params(metric: Metric) -> List[ValidationIssue]: issues: List[ValidationIssue] = [] - for input_metric in metric.input_metrics or []: + for input_metric in metric.type_params.metrics or []: if input_metric.offset_window and input_metric.offset_to_grain: issues.append( ValidationError( @@ -144,6 +155,40 @@ def _validate_time_offset_params(metric: Metric) -> List[ValidationIssue]: return issues + @staticmethod + @validate_safely(whats_being_done="checking that the expr field uses the input metrics") + def _validate_expr(metric: Metric) -> List[ValidationIssue]: + issues: List[ValidationIssue] = [] + + if metric.type == MetricType.DERIVED: + if not metric.type_params.expr: + issues.append( + ValidationWarning( + context=MetricContext( + file_context=FileContext.from_metadata(metadata=metric.metadata), + metric=MetricModelReference(metric_name=metric.name), + ), + message=f"No `expr` set for derived metric {metric.name}. " + "Please add an `expr` that references all input metrics.", + ) + ) + else: + for input_metric in metric.type_params.metrics or []: + name = input_metric.alias or input_metric.name + if name not in metric.type_params.expr: + issues.append( + ValidationWarning( + context=MetricContext( + file_context=FileContext.from_metadata(metadata=metric.metadata), + metric=MetricModelReference(metric_name=metric.name), + ), + message=f"Input metric '{name}' is not used in `expr`: '{metric.type_params.expr}' for " + f"derived metric '{metric.name}'. Please update the `expr` or remove the input metric.", + ) + ) + + return issues + @staticmethod @validate_safely( whats_being_done="running model validation ensuring derived metrics properties are configured properly" @@ -155,6 +200,7 @@ def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[Validati for metric in semantic_manifest.metrics or []: issues += DerivedMetricRule._validate_alias_collision(metric=metric) issues += DerivedMetricRule._validate_time_offset_params(metric=metric) + issues += DerivedMetricRule._validate_expr(metric=metric) return issues diff --git a/tests/validations/test_metrics.py b/tests/validations/test_metrics.py index da6bbf70..e6c42b08 100644 --- a/tests/validations/test_metrics.py +++ b/tests/validations/test_metrics.py @@ -266,6 +266,21 @@ def test_derived_metric() -> None: # noqa: D expr="notexist * 2", metrics=[PydanticMetricInput(name="notexist")] ), ), + metric_with_guaranteed_meta( + name="no_expr", + type=MetricType.DERIVED, + type_params=PydanticMetricTypeParams(metrics=[PydanticMetricInput(name="random_metric")]), + ), + metric_with_guaranteed_meta( + name="input_metric_not_in_expr", + type=MetricType.DERIVED, + type_params=PydanticMetricTypeParams(expr="x", metrics=[PydanticMetricInput(name="random_metric")]), + ), + metric_with_guaranteed_meta( + name="no_input_metrics", + type=MetricType.DERIVED, + type_params=PydanticMetricTypeParams(expr="x"), + ), metric_with_guaranteed_meta( name="has_valid_time_window_params", type=MetricType.DERIVED, @@ -299,13 +314,18 @@ def test_derived_metric() -> None: # noqa: D project_configuration=EXAMPLE_PROJECT_CONFIGURATION, ) ) - build_issues = validation_results.errors - assert len(build_issues) == 3 - expected_substr1 = "is already being used. Please choose another alias" - expected_substr2 = "does not exist as a configured metric in the model" - expected_substr3 = "Both offset_window and offset_to_grain set" + build_issues = validation_results.all_issues + assert len(build_issues) == 6 + expected_substrings = [ + "is already being used. Please choose another alias", + "does not exist as a configured metric in the model", + "Both offset_window and offset_to_grain set", + "is not used in `expr`", + "No input metrics found for derived metric", + "No `expr` set for derived metric", + ] missing_error_strings = set() - for expected_str in [expected_substr1, expected_substr2, expected_substr3]: + for expected_str in expected_substrings: if not any(actual_str.as_readable_str().find(expected_str) != -1 for actual_str in build_issues): missing_error_strings.add(expected_str) assert len(missing_error_strings) == 0, (