From f9b307de4208784bbae695ec389128637ad3a301 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Fri, 24 May 2024 09:50:07 -0700 Subject: [PATCH 1/3] Improve validations for derived metrics (#279) Improve validations for derived metrics. - If there were no input metrics set, previously users would run into an `AssertionError` that interrupts the semantic manifest validation process. This changes that to be a more user-friendly validation error. - Add a validation to ensure that `expr` is set and that it references all the input metrics. --- .../unreleased/Fixes-20240524-092249.yaml | 7 +++ .github/CODEOWNERS | 2 +- .../validations/metrics.py | 61 ++++++++++++++++--- tests/validations/test_metrics.py | 30 +++++++-- 4 files changed, 86 insertions(+), 14 deletions(-) create mode 100644 .changes/unreleased/Fixes-20240524-092249.yaml diff --git a/.changes/unreleased/Fixes-20240524-092249.yaml b/.changes/unreleased/Fixes-20240524-092249.yaml new file mode 100644 index 00000000..1a4b90ba --- /dev/null +++ b/.changes/unreleased/Fixes-20240524-092249.yaml @@ -0,0 +1,7 @@ +kind: Fixes +body: Improve validations for derived metrics. Improve error messaging if input metrics + are not set. Validate that expr is set and references all input metrics. +time: 2024-05-24T09:22:49.247603-07:00 +custom: + Author: courtneyholcomb + Issue: "270" diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index af7c38ea..d35ea0e3 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -13,4 +13,4 @@ # Parsing, validations, and protocols -/dbt_semantic_interfaces/** @dbt-labs/metricflow-admins @QMalcolm @graciegoheen +/dbt_semantic_interfaces/** @dbt-labs/metricflow-admins @QMalcolm @graciegoheen @dbt-labs/semantic-layer diff --git a/dbt_semantic_interfaces/validations/metrics.py b/dbt_semantic_interfaces/validations/metrics.py index 4533a1df..5221c7af 100644 --- a/dbt_semantic_interfaces/validations/metrics.py +++ b/dbt_semantic_interfaces/validations/metrics.py @@ -87,8 +87,9 @@ def _validate_alias_collision(metric: Metric) -> List[ValidationIssue]: file_context=FileContext.from_metadata(metadata=metric.metadata), metric=MetricModelReference(metric_name=metric.name), ) - used_names = {input_metric.name for input_metric in metric.input_metrics} - for input_metric in metric.input_metrics: + input_metrics = metric.type_params.metrics or [] + used_names = {input_metric.name for input_metric in input_metrics} + for input_metric in input_metrics: if input_metric.alias: issues += UniqueAndValidNameRule.check_valid_name(input_metric.alias, metric_context) if input_metric.alias in used_names: @@ -109,15 +110,24 @@ def _validate_input_metrics_exist(semantic_manifest: SemanticManifest) -> List[V all_metrics = {m.name for m in semantic_manifest.metrics} for metric in semantic_manifest.metrics: + metric_context = MetricContext( + file_context=FileContext.from_metadata(metadata=metric.metadata), + metric=MetricModelReference(metric_name=metric.name), + ) if metric.type == MetricType.DERIVED: - for input_metric in metric.input_metrics: + if not metric.type_params.metrics: + issues.append( + ValidationError( + context=metric_context, + message=f"No input metrics found for derived metric '{metric.name}'. " + "Please add metrics to type_params.metrics.", + ) + ) + for input_metric in metric.type_params.metrics or []: if input_metric.name not in all_metrics: issues.append( ValidationError( - context=MetricContext( - file_context=FileContext.from_metadata(metadata=metric.metadata), - metric=MetricModelReference(metric_name=metric.name), - ), + context=metric_context, message=f"For metric: {metric.name}, input metric: '{input_metric.name}' does not " "exist as a configured metric in the model.", ) @@ -129,7 +139,7 @@ def _validate_input_metrics_exist(semantic_manifest: SemanticManifest) -> List[V def _validate_time_offset_params(metric: Metric) -> List[ValidationIssue]: issues: List[ValidationIssue] = [] - for input_metric in metric.input_metrics or []: + for input_metric in metric.type_params.metrics or []: if input_metric.offset_window and input_metric.offset_to_grain: issues.append( ValidationError( @@ -144,6 +154,40 @@ def _validate_time_offset_params(metric: Metric) -> List[ValidationIssue]: return issues + @staticmethod + @validate_safely(whats_being_done="checking that the expr field uses the input metrics") + def _validate_expr(metric: Metric) -> List[ValidationIssue]: + issues: List[ValidationIssue] = [] + + if metric.type == MetricType.DERIVED: + if not metric.type_params.expr: + issues.append( + ValidationError( + context=MetricContext( + file_context=FileContext.from_metadata(metadata=metric.metadata), + metric=MetricModelReference(metric_name=metric.name), + ), + message=f"No expr set for derived metric {metric.name}. " + "Please add an expr that references all input metrics.", + ) + ) + else: + for input_metric in metric.type_params.metrics or []: + name = input_metric.alias or input_metric.name + if name not in metric.type_params.expr: + issues.append( + ValidationError( + context=MetricContext( + file_context=FileContext.from_metadata(metadata=metric.metadata), + metric=MetricModelReference(metric_name=metric.name), + ), + message=f"Input metric '{name}' is not used in expr: '{metric.type_params.expr}' for " + f"derived metric '{metric.name}'. Please update the expr or remove the input metric.", + ) + ) + + return issues + @staticmethod @validate_safely( whats_being_done="running model validation ensuring derived metrics properties are configured properly" @@ -155,6 +199,7 @@ def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[Validati for metric in semantic_manifest.metrics or []: issues += DerivedMetricRule._validate_alias_collision(metric=metric) issues += DerivedMetricRule._validate_time_offset_params(metric=metric) + issues += DerivedMetricRule._validate_expr(metric=metric) return issues diff --git a/tests/validations/test_metrics.py b/tests/validations/test_metrics.py index da6bbf70..7a1d320d 100644 --- a/tests/validations/test_metrics.py +++ b/tests/validations/test_metrics.py @@ -266,6 +266,21 @@ def test_derived_metric() -> None: # noqa: D expr="notexist * 2", metrics=[PydanticMetricInput(name="notexist")] ), ), + metric_with_guaranteed_meta( + name="no_expr", + type=MetricType.DERIVED, + type_params=PydanticMetricTypeParams(metrics=[PydanticMetricInput(name="random_metric")]), + ), + metric_with_guaranteed_meta( + name="input_metric_not_in_expr", + type=MetricType.DERIVED, + type_params=PydanticMetricTypeParams(expr="x", metrics=[PydanticMetricInput(name="random_metric")]), + ), + metric_with_guaranteed_meta( + name="no_input_metrics", + type=MetricType.DERIVED, + type_params=PydanticMetricTypeParams(expr="x"), + ), metric_with_guaranteed_meta( name="has_valid_time_window_params", type=MetricType.DERIVED, @@ -300,12 +315,17 @@ def test_derived_metric() -> None: # noqa: D ) ) build_issues = validation_results.errors - assert len(build_issues) == 3 - expected_substr1 = "is already being used. Please choose another alias" - expected_substr2 = "does not exist as a configured metric in the model" - expected_substr3 = "Both offset_window and offset_to_grain set" + assert len(build_issues) == 6 + expected_substrings = [ + "is already being used. Please choose another alias", + "does not exist as a configured metric in the model", + "Both offset_window and offset_to_grain set", + "is not used in expr", + "No input metrics found for derived metric", + "No expr set for derived metric", + ] missing_error_strings = set() - for expected_str in [expected_substr1, expected_substr2, expected_substr3]: + for expected_str in expected_substrings: if not any(actual_str.as_readable_str().find(expected_str) != -1 for actual_str in build_issues): missing_error_strings.add(expected_str) assert len(missing_error_strings) == 0, ( From c384353a64b8a4f5098a714626f914733d566a06 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Sun, 16 Jun 2024 09:19:59 -0700 Subject: [PATCH 2/3] Convert errors to warnings to avoid breaking change --- dbt_semantic_interfaces/validations/metrics.py | 13 +++++++------ tests/validations/test_metrics.py | 6 +++--- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/dbt_semantic_interfaces/validations/metrics.py b/dbt_semantic_interfaces/validations/metrics.py index 5221c7af..4397768a 100644 --- a/dbt_semantic_interfaces/validations/metrics.py +++ b/dbt_semantic_interfaces/validations/metrics.py @@ -19,6 +19,7 @@ SemanticManifestValidationRule, ValidationError, ValidationIssue, + ValidationWarning, generate_exception_issue, validate_safely, ) @@ -162,13 +163,13 @@ def _validate_expr(metric: Metric) -> List[ValidationIssue]: if metric.type == MetricType.DERIVED: if not metric.type_params.expr: issues.append( - ValidationError( + ValidationWarning( context=MetricContext( file_context=FileContext.from_metadata(metadata=metric.metadata), metric=MetricModelReference(metric_name=metric.name), ), - message=f"No expr set for derived metric {metric.name}. " - "Please add an expr that references all input metrics.", + message=f"No `expr` set for derived metric {metric.name}. " + "Please add an `expr` that references all input metrics.", ) ) else: @@ -176,13 +177,13 @@ def _validate_expr(metric: Metric) -> List[ValidationIssue]: name = input_metric.alias or input_metric.name if name not in metric.type_params.expr: issues.append( - ValidationError( + ValidationWarning( context=MetricContext( file_context=FileContext.from_metadata(metadata=metric.metadata), metric=MetricModelReference(metric_name=metric.name), ), - message=f"Input metric '{name}' is not used in expr: '{metric.type_params.expr}' for " - f"derived metric '{metric.name}'. Please update the expr or remove the input metric.", + message=f"Input metric '{name}' is not used in `expr`: '{metric.type_params.expr}' for " + f"derived metric '{metric.name}'. Please update the `expr` or remove the input metric.", ) ) diff --git a/tests/validations/test_metrics.py b/tests/validations/test_metrics.py index 7a1d320d..e6c42b08 100644 --- a/tests/validations/test_metrics.py +++ b/tests/validations/test_metrics.py @@ -314,15 +314,15 @@ def test_derived_metric() -> None: # noqa: D project_configuration=EXAMPLE_PROJECT_CONFIGURATION, ) ) - build_issues = validation_results.errors + build_issues = validation_results.all_issues assert len(build_issues) == 6 expected_substrings = [ "is already being used. Please choose another alias", "does not exist as a configured metric in the model", "Both offset_window and offset_to_grain set", - "is not used in expr", + "is not used in `expr`", "No input metrics found for derived metric", - "No expr set for derived metric", + "No `expr` set for derived metric", ] missing_error_strings = set() for expected_str in expected_substrings: From ef10d1990e21b0df3e03f93fac9032202826d140 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Sun, 16 Jun 2024 09:21:58 -0700 Subject: [PATCH 3/3] Remove unneeded codeowners change --- .github/CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index d35ea0e3..af7c38ea 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -13,4 +13,4 @@ # Parsing, validations, and protocols -/dbt_semantic_interfaces/** @dbt-labs/metricflow-admins @QMalcolm @graciegoheen @dbt-labs/semantic-layer +/dbt_semantic_interfaces/** @dbt-labs/metricflow-admins @QMalcolm @graciegoheen