From a968ffd7938fda6e021149a0d95b2750a6c1a821 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Fri, 15 Nov 2024 09:51:27 -0800 Subject: [PATCH 1/8] Bump version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 96439504..dd762796 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dbt-semantic-interfaces" -version = "0.8.1" +version = "0.8.2" description = 'The shared semantic layer definitions that dbt-core and MetricFlow use' readme = "README.md" requires-python = ">=3.8" From 2083a249889252823b097098cb03471f2db0d00e Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Mon, 25 Nov 2024 16:35:11 -0800 Subject: [PATCH 2/8] Add more case sensitivity tests --- tests/parsing/test_metric_parsing.py | 112 +++++++++++++++++++++++---- 1 file changed, 99 insertions(+), 13 deletions(-) diff --git a/tests/parsing/test_metric_parsing.py b/tests/parsing/test_metric_parsing.py index 79064519..ad120fb0 100644 --- a/tests/parsing/test_metric_parsing.py +++ b/tests/parsing/test_metric_parsing.py @@ -13,6 +13,9 @@ parse_yaml_files_to_semantic_manifest, ) from dbt_semantic_interfaces.parsing.objects import YamlConfigFile +from dbt_semantic_interfaces.transformations.semantic_manifest_transformer import ( + PydanticSemanticManifestTransformer, +) from dbt_semantic_interfaces.type_enums import ( ConversionCalculationType, MetricType, @@ -202,8 +205,8 @@ def test_ratio_metric_input_measure_object_parsing() -> None: assert metric.type_params.denominator == PydanticMetricInput(name="denominator_metric_from_object") -def test_cumulative_window_metric_parsing() -> None: - """Test for parsing a metric specification with a cumulative window.""" +def test_cumulative_window_old_metric_parsing() -> None: + """Test for parsing a metric specification with a cumulative window using old field.""" yaml_contents = textwrap.dedent( """\ metric: @@ -227,8 +230,38 @@ def test_cumulative_window_metric_parsing() -> None: assert metric.type_params.window == PydanticMetricTimeWindow(count=7, granularity=TimeGranularity.DAY.value) -def test_grain_to_date_metric_parsing() -> None: - """Test for parsing a metric specification with the grain to date cumulative setting.""" +def test_cumulative_window_metric_parsing() -> None: + """Test for parsing a metric specification with a cumulative window.""" + yaml_contents = textwrap.dedent( + """\ + metric: + name: cumulative_test + type: cumulative + type_params: + measure: + name: cumulative_measure + cumulative_type_params: + window: "7 Days" + """ + ) + file = YamlConfigFile(filepath="inline_for_test", contents=yaml_contents) + + build_result = parse_yaml_files_to_semantic_manifest(files=[file, EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE]) + + assert len(build_result.semantic_manifest.metrics) == 1 + metric = build_result.semantic_manifest.metrics[0] + assert metric.name == "cumulative_test" + assert metric.type is MetricType.CUMULATIVE + assert metric.type_params.measure == PydanticMetricInputMeasure(name="cumulative_measure") + assert ( + metric.type_params.cumulative_type_params + and metric.type_params.cumulative_type_params.window + == PydanticMetricTimeWindow(count=7, granularity=TimeGranularity.DAY.value) + ) + + +def test_grain_to_date_metric_old_parsing() -> None: + """Test for parsing a metric specification with grain to date using the old field.""" yaml_contents = textwrap.dedent( """\ metric: @@ -249,10 +282,38 @@ def test_grain_to_date_metric_parsing() -> None: assert metric.name == "grain_to_date_test" assert metric.type is MetricType.CUMULATIVE assert metric.type_params.measure == PydanticMetricInputMeasure(name="cumulative_measure") - assert metric.type_params.window is None assert metric.type_params.grain_to_date is TimeGranularity.WEEK +def test_grain_to_date_metric_parsing() -> None: + """Test for parsing a metric specification with the grain to date cumulative setting.""" + yaml_contents = textwrap.dedent( + """\ + metric: + name: grain_to_date_test + type: cumulative + type_params: + measure: + name: cumulative_measure + cumulative_type_params: + grain_to_date: "weEk" + """ + ) + file = YamlConfigFile(filepath="inline_for_test", contents=yaml_contents) + + build_result = parse_yaml_files_to_semantic_manifest(files=[file, EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE]) + + assert len(build_result.semantic_manifest.metrics) == 1 + metric = build_result.semantic_manifest.metrics[0] + assert metric.name == "grain_to_date_test" + assert metric.type is MetricType.CUMULATIVE + assert metric.type_params.measure == PydanticMetricInputMeasure(name="cumulative_measure") + assert ( + metric.type_params.cumulative_type_params + and metric.type_params.cumulative_type_params.grain_to_date == TimeGranularity.WEEK.value + ) + + def test_derived_metric_offset_window_parsing() -> None: """Test for parsing a derived metric with an offset window.""" yaml_contents = textwrap.dedent( @@ -265,7 +326,7 @@ def test_derived_metric_offset_window_parsing() -> None: metrics: - name: bookings - name: bookings - offset_window: 14 days + offset_window: 14 dAYs alias: bookings_2_weeks_ago """ ) @@ -287,10 +348,33 @@ def test_derived_metric_offset_window_parsing() -> None: assert metric.type_params.expr == "bookings / bookings_2_weeks_ago" -def test_derive_metric_offset_to_grain_parsing() -> None: +def test_derived_metric_offset_to_grain_parsing() -> None: """Test for parsing a derived metric with an offset to grain to date.""" yaml_contents = textwrap.dedent( """\ + semantic_model: + name: sample_semantic_model + node_relation: + schema_name: some_schema + alias: source_table + defaults: + agg_time_dimension: ds + entities: + - name: example_entity + type: primary + role: test_role + expr: example_id + measures: + - name: bookings + agg: sum + expr: 1 + create_metric: true + dimensions: + - name: ds + type: time + type_params: + time_granularity: day + --- metric: name: derived_offset_to_grain_test type: derived @@ -299,7 +383,7 @@ def test_derive_metric_offset_to_grain_parsing() -> None: metrics: - name: bookings - name: bookings - offset_to_grain: month + offset_to_grain: moNTh alias: bookings_at_start_of_month """ ) @@ -307,10 +391,12 @@ def test_derive_metric_offset_to_grain_parsing() -> None: build_result = parse_yaml_files_to_semantic_manifest(files=[file, EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE]) - assert len(build_result.issues.all_issues) == 0 - assert len(build_result.semantic_manifest.metrics) == 1 - metric = build_result.semantic_manifest.metrics[0] - assert metric.name == "derived_offset_to_grain_test" + # Apply transformations to ensure lowercasing + manifest = PydanticSemanticManifestTransformer().transform(build_result.semantic_manifest) + + assert len(manifest.metrics) == 2 + metric = [m for m in manifest.metrics if m.name == "derived_offset_to_grain_test"][0] + assert metric assert metric.type is MetricType.DERIVED assert metric.type_params.metrics and len(metric.type_params.metrics) == 2 metric1, metric2 = metric.type_params.metrics @@ -430,7 +516,7 @@ def test_conversion_metric_parsing() -> None: conversion_type_params: base_measure: opportunity conversion_measure: conversions - window: 7 days + window: 7 dAys entity: user """ ) From 00abab69a8b4537abcc0ea9e544547e99a3c27a2 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Mon, 25 Nov 2024 16:35:43 -0800 Subject: [PATCH 3/8] Ensure lowercasing for grain_to_date and offset_to_grain --- .../implementations/metric.py | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/dbt_semantic_interfaces/implementations/metric.py b/dbt_semantic_interfaces/implementations/metric.py index 4b596f0a..3fe6b512 100644 --- a/dbt_semantic_interfaces/implementations/metric.py +++ b/dbt_semantic_interfaces/implementations/metric.py @@ -1,6 +1,7 @@ from __future__ import annotations -from typing import Dict, List, Optional, Sequence, Set +from copy import deepcopy +from typing import Any, Dict, List, Optional, Sequence, Set from typing_extensions import override @@ -222,6 +223,26 @@ def _implements_protocol(self) -> Metric: # noqa: D config: Optional[PydanticSemanticLayerElementConfig] time_granularity: Optional[str] = None + @classmethod + def parse_obj(cls, input: Any) -> PydanticMetric: + data = deepcopy(input) + + # Ensure grain_to_date is lowercased + type_params = data.get("type_params", {}) + grain_to_date = type_params.get("cumulative_type_params", {}).get("grain_to_date") + if isinstance(grain_to_date, str): + data["type_params"]["cumulative_type_params"]["grain_to_date"] = grain_to_date.lower() + + # Ensure offset_to_grain is lowercased + input_metrics = type_params.get("metrics", []) + if input_metrics: + for input_metric in input_metrics: + offset_to_grain = input_metric.get("offset_to_grain") + if offset_to_grain and isinstance(offset_to_grain, str): + input_metric["offset_to_grain"] = offset_to_grain.lower() + + return super(HashableBaseModel, cls).parse_obj(data) + @property def input_measures(self) -> Sequence[PydanticMetricInputMeasure]: """Return the complete list of input measure configurations for this metric.""" From 9151740c2e47b5e2f7c2b6370ba66df5d5602c0d Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Mon, 25 Nov 2024 18:14:09 -0800 Subject: [PATCH 4/8] Refactor validation for metric time windows --- .../implementations/metric.py | 27 +---- .../remove_plural_from_window_granularity.py | 23 ++-- .../validations/metrics.py | 100 +++++++++++------- tests/parsing/test_metric_parsing.py | 56 +++++++--- tests/validations/test_metrics.py | 31 ++---- 5 files changed, 131 insertions(+), 106 deletions(-) diff --git a/dbt_semantic_interfaces/implementations/metric.py b/dbt_semantic_interfaces/implementations/metric.py index 3fe6b512..b693d87d 100644 --- a/dbt_semantic_interfaces/implementations/metric.py +++ b/dbt_semantic_interfaces/implementations/metric.py @@ -84,7 +84,7 @@ def _from_yaml_value(cls, input: PydanticParseableValueType) -> PydanticMetricTi The MetricTimeWindow is always expected to be provided as a string in user-defined YAML configs. """ if isinstance(input, str): - return PydanticMetricTimeWindow.parse(window=input.lower(), custom_granularity_names=(), strict=False) + return PydanticMetricTimeWindow.parse(window=input.lower()) else: raise ValueError( f"MetricTimeWindow inputs from model configs are expected to always be of type string, but got " @@ -102,12 +102,8 @@ def window_string(self) -> str: return f"{self.count} {self.granularity}" @staticmethod - def parse(window: str, custom_granularity_names: Sequence[str], strict: bool = True) -> PydanticMetricTimeWindow: - """Returns window values if parsing succeeds, None otherwise. - - If strict=True, then the granularity in the window must exist as a valid granularity. - Use strict=True for when you have all valid granularities, otherwise use strict=False. - """ + def parse(window: str) -> PydanticMetricTimeWindow: + """Returns window values if parsing succeeds, None otherwise.""" parts = window.lower().split(" ") if len(parts) != 2: raise ParsingException( @@ -116,22 +112,6 @@ def parse(window: str, custom_granularity_names: Sequence[str], strict: bool = T ) granularity = parts[1] - - valid_time_granularities = {item.value.lower() for item in TimeGranularity} | set( - c.lower() for c in custom_granularity_names - ) - - # if we switched to python 3.9 this could just be `granularity = parts[0].removesuffix('s') - if granularity.endswith("s") and granularity[:-1] in valid_time_granularities: - # months -> month - granularity = granularity[:-1] - - if strict and granularity not in valid_time_granularities: - raise ParsingException( - f"Invalid time granularity {granularity} in metric window string: ({window})", - ) - # If not strict and not standard granularity, it may be a custom grain, so validations happens later - count = parts[0] if not count.isdigit(): raise ParsingException(f"Invalid count ({count}) in cumulative metric window string: ({window})") @@ -225,6 +205,7 @@ def _implements_protocol(self) -> Metric: # noqa: D @classmethod def parse_obj(cls, input: Any) -> PydanticMetric: + """Adds custom parsing to the default method.""" data = deepcopy(input) # Ensure grain_to_date is lowercased diff --git a/dbt_semantic_interfaces/transformations/remove_plural_from_window_granularity.py b/dbt_semantic_interfaces/transformations/remove_plural_from_window_granularity.py index 9e5fb5f4..de281823 100644 --- a/dbt_semantic_interfaces/transformations/remove_plural_from_window_granularity.py +++ b/dbt_semantic_interfaces/transformations/remove_plural_from_window_granularity.py @@ -12,7 +12,7 @@ from dbt_semantic_interfaces.transformations.transform_rule import ( SemanticManifestTransformRule, ) -from dbt_semantic_interfaces.type_enums import MetricType +from dbt_semantic_interfaces.type_enums import MetricType, TimeGranularity class RemovePluralFromWindowGranularityRule(ProtocolHint[SemanticManifestTransformRule[PydanticSemanticManifest]]): @@ -33,12 +33,18 @@ def _update_metric( semantic_manifest: PydanticSemanticManifest, metric_name: str, custom_granularity_names: Sequence[str] ) -> None: """Mutates all the MetricTimeWindow by reparsing to remove the trailing 's'.""" + valid_time_granularities = {item.value.lower() for item in TimeGranularity} | set( + c.lower() for c in custom_granularity_names + ) - def reparse_window(window: PydanticMetricTimeWindow) -> PydanticMetricTimeWindow: + def trim_trailing_s(window: PydanticMetricTimeWindow) -> PydanticMetricTimeWindow: """Reparse the window to remove the trailing 's'.""" - return PydanticMetricTimeWindow.parse( - window=window.window_string, custom_granularity_names=custom_granularity_names - ) + granularity = window.granularity + if granularity.endswith("s") and granularity[:-1] in valid_time_granularities: + # months -> month + granularity = granularity[:-1] + window.granularity = granularity + return window matched_metric = next( iter((metric for metric in semantic_manifest.metrics if metric.name == metric_name)), None @@ -49,22 +55,23 @@ def reparse_window(window: PydanticMetricTimeWindow) -> PydanticMetricTimeWindow matched_metric.type_params.cumulative_type_params and matched_metric.type_params.cumulative_type_params.window ): - matched_metric.type_params.cumulative_type_params.window = reparse_window( + matched_metric.type_params.cumulative_type_params.window = trim_trailing_s( matched_metric.type_params.cumulative_type_params.window ) + elif matched_metric.type is MetricType.CONVERSION: if ( matched_metric.type_params.conversion_type_params and matched_metric.type_params.conversion_type_params.window ): - matched_metric.type_params.conversion_type_params.window = reparse_window( + matched_metric.type_params.conversion_type_params.window = trim_trailing_s( matched_metric.type_params.conversion_type_params.window ) elif matched_metric.type is MetricType.DERIVED or matched_metric.type is MetricType.RATIO: for input_metric in matched_metric.input_metrics: if input_metric.offset_window: - input_metric.offset_window = reparse_window(input_metric.offset_window) + input_metric.offset_window = trim_trailing_s(input_metric.offset_window) elif matched_metric.type is MetricType.SIMPLE: pass else: diff --git a/dbt_semantic_interfaces/validations/metrics.py b/dbt_semantic_interfaces/validations/metrics.py index aef07ed2..c069f742 100644 --- a/dbt_semantic_interfaces/validations/metrics.py +++ b/dbt_semantic_interfaces/validations/metrics.py @@ -1,16 +1,12 @@ -import traceback from typing import Dict, Generic, List, Optional, Sequence -from dbt_semantic_interfaces.errors import ParsingException -from dbt_semantic_interfaces.implementations.metric import ( - PydanticMetric, - PydanticMetricTimeWindow, -) +from dbt_semantic_interfaces.implementations.metric import PydanticMetric from dbt_semantic_interfaces.protocols import ( ConversionTypeParams, Dimension, Metric, MetricInputMeasure, + MetricTimeWindow, SemanticManifest, SemanticManifestT, SemanticModel, @@ -46,9 +42,9 @@ class CumulativeMetricRule(SemanticManifestValidationRule[SemanticManifestT], Generic[SemanticManifestT]): """Checks that cumulative metrics are configured properly.""" - @staticmethod + @classmethod @validate_safely(whats_being_done="running model validation ensuring cumulative metrics are valid") - def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[ValidationIssue]: # noqa: D + def validate_manifest(cls, semantic_manifest: SemanticManifestT) -> Sequence[ValidationIssue]: # noqa: D issues: List[ValidationIssue] = [] custom_granularity_names = [ @@ -109,19 +105,31 @@ def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[Validati ) if window: - try: - # TODO: Should not call an implementation class. - PydanticMetricTimeWindow.parse( - window=window.window_string, custom_granularity_names=custom_granularity_names - ) - except ParsingException as e: - issues.append( - ValidationError( - context=metric_context, - message="".join(traceback.format_exception_only(type(e), value=e)), - extra_detail="".join(traceback.format_tb(e.__traceback__)), - ) - ) + issues += cls.validate_metric_time_window( + metric_context=metric_context, window=window, custom_granularities=custom_granularity_names + ) + + return issues + + @classmethod + def validate_metric_time_window( # noqa: D + cls, metric_context: MetricContext, window: MetricTimeWindow, custom_granularities: Sequence[str] + ) -> Sequence[ValidationIssue]: + issues: List[ValidationIssue] = [] + + valid_granularities = set(custom_granularities) | {item.value.lower() for item in TimeGranularity} + window_granularity = window.granularity + if window_granularity.endswith("s") and window_granularity[:-1] in valid_granularities: + # months -> month + window_granularity = window_granularity[:-1] + + if window_granularity not in valid_granularities: + issues.append( + ValidationError( + context=metric_context, + message=f"Invalid time granularity {window_granularity} in window: ({window})", + ) + ) return issues @@ -188,17 +196,24 @@ def _validate_input_metrics_exist(semantic_manifest: SemanticManifest) -> List[V @staticmethod @validate_safely(whats_being_done="checking that input metric time offset params are valid") - def _validate_time_offset_params(metric: Metric) -> List[ValidationIssue]: + def _validate_time_offset_params(metric: Metric, custom_granularities: Sequence[str]) -> List[ValidationIssue]: issues: List[ValidationIssue] = [] + metric_context = MetricContext( + file_context=FileContext.from_metadata(metadata=metric.metadata), + metric=MetricModelReference(metric_name=metric.name), + ) for input_metric in metric.type_params.metrics or []: + if input_metric.offset_window: + issues += CumulativeMetricRule.validate_metric_time_window( + metric_context=metric_context, + window=input_metric.offset_window, + custom_granularities=custom_granularities, + ) if input_metric.offset_window and input_metric.offset_to_grain: issues.append( ValidationError( - context=MetricContext( - file_context=FileContext.from_metadata(metadata=metric.metadata), - metric=MetricModelReference(metric_name=metric.name), - ), + context=metric_context, message=f"Both offset_window and offset_to_grain set for derived metric '{metric.name}' on " f"input metric '{input_metric.name}'. Please set one or the other.", ) @@ -247,10 +262,18 @@ def _validate_expr(metric: Metric) -> List[ValidationIssue]: def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[ValidationIssue]: # noqa: D issues: List[ValidationIssue] = [] + custom_granularity_names = [ + granularity.name + for time_spine in semantic_manifest.project_configuration.time_spines + for granularity in time_spine.custom_granularities + ] + issues += DerivedMetricRule._validate_input_metrics_exist(semantic_manifest=semantic_manifest) for metric in semantic_manifest.metrics or []: issues += DerivedMetricRule._validate_alias_collision(metric=metric) - issues += DerivedMetricRule._validate_time_offset_params(metric=metric) + issues += DerivedMetricRule._validate_time_offset_params( + metric=metric, custom_granularities=custom_granularity_names + ) issues += DerivedMetricRule._validate_expr(metric=metric) return issues @@ -267,20 +290,15 @@ def _validate_type_params( window = conversion_type_params.window if window: - try: - window_str = f"{window.count} {window.granularity}" - PydanticMetricTimeWindow.parse(window=window_str, custom_granularity_names=custom_granularity_names) - except ParsingException as e: - issues.append( - ValidationError( - context=MetricContext( - file_context=FileContext.from_metadata(metadata=metric.metadata), - metric=MetricModelReference(metric_name=metric.name), - ), - message="".join(traceback.format_exception_only(type(e), value=e)), - extra_detail="".join(traceback.format_tb(e.__traceback__)), - ) - ) + issues += CumulativeMetricRule.validate_metric_time_window( + metric_context=MetricContext( + file_context=FileContext.from_metadata(metadata=metric.metadata), + metric=MetricModelReference(metric_name=metric.name), + ), + window=window, + custom_granularities=custom_granularity_names, + ) + return issues @staticmethod diff --git a/tests/parsing/test_metric_parsing.py b/tests/parsing/test_metric_parsing.py index ad120fb0..cb307ff6 100644 --- a/tests/parsing/test_metric_parsing.py +++ b/tests/parsing/test_metric_parsing.py @@ -11,11 +11,9 @@ ) from dbt_semantic_interfaces.parsing.dir_to_model import ( parse_yaml_files_to_semantic_manifest, + parse_yaml_files_to_validation_ready_semantic_manifest, ) from dbt_semantic_interfaces.parsing.objects import YamlConfigFile -from dbt_semantic_interfaces.transformations.semantic_manifest_transformer import ( - PydanticSemanticManifestTransformer, -) from dbt_semantic_interfaces.type_enums import ( ConversionCalculationType, MetricType, @@ -220,7 +218,9 @@ def test_cumulative_window_old_metric_parsing() -> None: ) file = YamlConfigFile(filepath="inline_for_test", contents=yaml_contents) - build_result = parse_yaml_files_to_semantic_manifest(files=[file, EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE]) + build_result = parse_yaml_files_to_validation_ready_semantic_manifest( + [file, EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE] + ) assert len(build_result.semantic_manifest.metrics) == 1 metric = build_result.semantic_manifest.metrics[0] @@ -246,7 +246,9 @@ def test_cumulative_window_metric_parsing() -> None: ) file = YamlConfigFile(filepath="inline_for_test", contents=yaml_contents) - build_result = parse_yaml_files_to_semantic_manifest(files=[file, EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE]) + build_result = parse_yaml_files_to_validation_ready_semantic_manifest( + [file, EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE] + ) assert len(build_result.semantic_manifest.metrics) == 1 metric = build_result.semantic_manifest.metrics[0] @@ -318,6 +320,29 @@ def test_derived_metric_offset_window_parsing() -> None: """Test for parsing a derived metric with an offset window.""" yaml_contents = textwrap.dedent( """\ + semantic_model: + name: sample_semantic_model + node_relation: + schema_name: some_schema + alias: source_table + defaults: + agg_time_dimension: ds + entities: + - name: example_entity + type: primary + role: test_role + expr: example_id + measures: + - name: bookings + agg: sum + expr: 1 + create_metric: true + dimensions: + - name: ds + type: time + type_params: + time_granularity: day + --- metric: name: derived_offset_test type: derived @@ -332,10 +357,12 @@ def test_derived_metric_offset_window_parsing() -> None: ) file = YamlConfigFile(filepath="inline_for_test", contents=yaml_contents) - build_result = parse_yaml_files_to_semantic_manifest(files=[file, EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE]) + build_result = parse_yaml_files_to_validation_ready_semantic_manifest( + [file, EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE] + ) assert len(build_result.issues.all_issues) == 0 - assert len(build_result.semantic_manifest.metrics) == 1 + assert len(build_result.semantic_manifest.metrics) == 2 metric = build_result.semantic_manifest.metrics[0] assert metric.name == "derived_offset_test" assert metric.type is MetricType.DERIVED @@ -389,13 +416,12 @@ def test_derived_metric_offset_to_grain_parsing() -> None: ) file = YamlConfigFile(filepath="inline_for_test", contents=yaml_contents) - build_result = parse_yaml_files_to_semantic_manifest(files=[file, EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE]) - - # Apply transformations to ensure lowercasing - manifest = PydanticSemanticManifestTransformer().transform(build_result.semantic_manifest) + build_result = parse_yaml_files_to_validation_ready_semantic_manifest( + [file, EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE] + ) - assert len(manifest.metrics) == 2 - metric = [m for m in manifest.metrics if m.name == "derived_offset_to_grain_test"][0] + assert len(build_result.semantic_manifest.metrics) == 2 + metric = [m for m in build_result.semantic_manifest.metrics if m.name == "derived_offset_to_grain_test"][0] assert metric assert metric.type is MetricType.DERIVED assert metric.type_params.metrics and len(metric.type_params.metrics) == 2 @@ -522,7 +548,9 @@ def test_conversion_metric_parsing() -> None: ) file = YamlConfigFile(filepath="inline_for_test", contents=yaml_contents) - build_result = parse_yaml_files_to_semantic_manifest(files=[file, EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE]) + build_result = parse_yaml_files_to_validation_ready_semantic_manifest( + [file, EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE] + ) assert len(build_result.semantic_manifest.metrics) == 1 metric = build_result.semantic_manifest.metrics[0] diff --git a/tests/validations/test_metrics.py b/tests/validations/test_metrics.py index 8571373b..4955fb79 100644 --- a/tests/validations/test_metrics.py +++ b/tests/validations/test_metrics.py @@ -304,7 +304,7 @@ def test_derived_metric() -> None: # noqa: D metrics=[ PydanticMetricInput( name="random_metric", - offset_window=PydanticMetricTimeWindow.parse("3 weeks", custom_granularity_names=()), + offset_window=PydanticMetricTimeWindow.parse("3 weekies"), ), PydanticMetricInput( name="random_metric", @@ -322,7 +322,7 @@ def test_derived_metric() -> None: # noqa: D metrics=[ PydanticMetricInput( name="random_metric", - offset_window=PydanticMetricTimeWindow.parse("3 weeks", custom_granularity_names=()), + offset_window=PydanticMetricTimeWindow.parse("3 weeks"), offset_to_grain=TimeGranularity.MONTH.value, ) ], @@ -333,7 +333,7 @@ def test_derived_metric() -> None: # noqa: D ) ) build_issues = validation_results.all_issues - assert len(build_issues) == 6 + assert len(build_issues) == 7 expected_substrings = [ "is already being used. Please choose another alias", "does not exist as a configured metric in the model", @@ -341,6 +341,7 @@ def test_derived_metric() -> None: # noqa: D "is not used in `expr`", "No input metrics found for derived metric", "No `expr` set for derived metric", + "Invalid time granularity", ] check_error_in_issues(error_substrings=expected_substrings, issues=build_issues) @@ -351,7 +352,7 @@ def test_conversion_metrics() -> None: # noqa: D entity = "entity" invalid_entity = "bad" invalid_measure = "invalid_measure" - window = PydanticMetricTimeWindow.parse("7 days", custom_granularity_names=()) + window = PydanticMetricTimeWindow.parse("7 days") validator = SemanticManifestValidator[PydanticSemanticManifest]([ConversionMetricRule()]) result = validator.validate_semantic_manifest( PydanticSemanticManifest( @@ -478,7 +479,7 @@ def test_conversion_metrics() -> None: # noqa: D conversion_measure=PydanticMetricInputMeasure( name=conversion_measure_name, ), - window=PydanticMetricTimeWindow.parse("7 moons", custom_granularity_names=(), strict=False), + window=PydanticMetricTimeWindow.parse("7 moons"), entity=entity, ) ), @@ -492,9 +493,7 @@ def test_conversion_metrics() -> None: # noqa: D conversion_measure=PydanticMetricInputMeasure( name=conversion_measure_name, ), - window=PydanticMetricTimeWindow.parse( - "7 martian_week", custom_granularity_names=("martian_week",) - ), + window=PydanticMetricTimeWindow.parse("7 martian_week"), entity=entity, ) ), @@ -508,9 +507,7 @@ def test_conversion_metrics() -> None: # noqa: D conversion_measure=PydanticMetricInputMeasure( name=conversion_measure_name, ), - window=PydanticMetricTimeWindow.parse( - "7 martian_weeks", custom_granularity_names=("martian_week",) - ), + window=PydanticMetricTimeWindow.parse("7 martian_weeks"), entity=entity, ) ), @@ -640,9 +637,7 @@ def test_cumulative_metrics() -> None: # noqa: D type_params=PydanticMetricTypeParams( measure=PydanticMetricInputMeasure(name=measure_name), cumulative_type_params=PydanticCumulativeTypeParams( - window=PydanticMetricTimeWindow.parse( - window="3 moons", custom_granularity_names=(), strict=False - ), + window=PydanticMetricTimeWindow.parse(window="3 moons"), ), ), ), @@ -652,9 +647,7 @@ def test_cumulative_metrics() -> None: # noqa: D type_params=PydanticMetricTypeParams( measure=PydanticMetricInputMeasure(name=measure_name), cumulative_type_params=PydanticCumulativeTypeParams( - window=PydanticMetricTimeWindow.parse( - window="3 martian_week", custom_granularity_names=("martian_week",), strict=False - ), + window=PydanticMetricTimeWindow.parse(window="3 martian_week"), ), ), ), @@ -664,9 +657,7 @@ def test_cumulative_metrics() -> None: # noqa: D type_params=PydanticMetricTypeParams( measure=PydanticMetricInputMeasure(name=measure_name), cumulative_type_params=PydanticCumulativeTypeParams( - window=PydanticMetricTimeWindow.parse( - window="3 martian_weeks", custom_granularity_names=("martian_week",), strict=False - ), + window=PydanticMetricTimeWindow.parse(window="3 martian_weeks"), ), ), ), From 825622c4914bc04651c37678de66f44e9b1dcafe Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Tue, 26 Nov 2024 14:59:07 -0800 Subject: [PATCH 5/8] Add validations to temporarily block custom grain in all fields except offset_window We decided to cut scope on custom calendar in the meantime and only enable it for offset_windows. These validations will make that an easy user experience, while still allowing us to easily remove them once we build support. --- .../validations/metrics.py | 47 +++++++++++++- tests/validations/test_metrics.py | 61 +++++++++++++++++-- 2 files changed, 100 insertions(+), 8 deletions(-) diff --git a/dbt_semantic_interfaces/validations/metrics.py b/dbt_semantic_interfaces/validations/metrics.py index c069f742..57d7c7d4 100644 --- a/dbt_semantic_interfaces/validations/metrics.py +++ b/dbt_semantic_interfaces/validations/metrics.py @@ -38,6 +38,8 @@ WhereFiltersAreParseable, # noQa ) +TEMP_CUSTOM_GRAIN_MSG = "Custom granularities are not supported for this field yet." + class CumulativeMetricRule(SemanticManifestValidationRule[SemanticManifestT], Generic[SemanticManifestT]): """Checks that cumulative metrics are configured properly.""" @@ -52,6 +54,7 @@ def validate_manifest(cls, semantic_manifest: SemanticManifestT) -> Sequence[Val for time_spine in semantic_manifest.project_configuration.time_spines for granularity in time_spine.custom_granularities ] + standard_granularities = {item.value.lower() for item in TimeGranularity} for metric in semantic_manifest.metrics or []: if metric.type != MetricType.CUMULATIVE: @@ -96,6 +99,18 @@ def validate_manifest(cls, semantic_manifest: SemanticManifestT) -> Sequence[Val grain_to_date = metric.type_params.grain_to_date.value if metric.type_params.grain_to_date else None if metric.type_params.cumulative_type_params and metric.type_params.cumulative_type_params.grain_to_date: grain_to_date = metric.type_params.cumulative_type_params.grain_to_date + + if grain_to_date and grain_to_date not in standard_granularities: + issues.append( + ValidationError( + context=metric_context, + message=( + f"Invalid time granularity found in `grain_to_date`: '{grain_to_date}'. " + f"{TEMP_CUSTOM_GRAIN_MSG}" + ), + ) + ) + if window and grain_to_date: issues.append( ValidationError( @@ -113,21 +128,34 @@ def validate_manifest(cls, semantic_manifest: SemanticManifestT) -> Sequence[Val @classmethod def validate_metric_time_window( # noqa: D - cls, metric_context: MetricContext, window: MetricTimeWindow, custom_granularities: Sequence[str] + cls, + metric_context: MetricContext, + window: MetricTimeWindow, + custom_granularities: Sequence[str], + allow_custom: bool = False, ) -> Sequence[ValidationIssue]: issues: List[ValidationIssue] = [] - valid_granularities = set(custom_granularities) | {item.value.lower() for item in TimeGranularity} + standard_granularities = {item.value.lower() for item in TimeGranularity} + valid_granularities = set(custom_granularities) | standard_granularities window_granularity = window.granularity if window_granularity.endswith("s") and window_granularity[:-1] in valid_granularities: # months -> month window_granularity = window_granularity[:-1] + msg = f"Invalid time granularity '{window_granularity}' in window: '{window.window_string}'" if window_granularity not in valid_granularities: issues.append( ValidationError( context=metric_context, - message=f"Invalid time granularity {window_granularity} in window: ({window})", + message=msg, + ) + ) + elif not allow_custom and (window_granularity not in standard_granularities): + issues.append( + ValidationError( + context=metric_context, + message=msg + " " + TEMP_CUSTOM_GRAIN_MSG, ) ) @@ -199,6 +227,8 @@ def _validate_input_metrics_exist(semantic_manifest: SemanticManifest) -> List[V def _validate_time_offset_params(metric: Metric, custom_granularities: Sequence[str]) -> List[ValidationIssue]: issues: List[ValidationIssue] = [] + standard_granularities = {item.value.lower() for item in TimeGranularity} + metric_context = MetricContext( file_context=FileContext.from_metadata(metadata=metric.metadata), metric=MetricModelReference(metric_name=metric.name), @@ -209,6 +239,17 @@ def _validate_time_offset_params(metric: Metric, custom_granularities: Sequence[ metric_context=metric_context, window=input_metric.offset_window, custom_granularities=custom_granularities, + allow_custom=True, + ) + if input_metric.offset_to_grain and input_metric.offset_to_grain not in standard_granularities: + issues.append( + ValidationError( + context=metric_context, + message=( + f"Invalid time granularity found in `offset_to_grain`: '{input_metric.offset_to_grain}'. " + f"{TEMP_CUSTOM_GRAIN_MSG}" + ), + ) ) if input_metric.offset_window and input_metric.offset_to_grain: issues.append( diff --git a/tests/validations/test_metrics.py b/tests/validations/test_metrics.py index 4955fb79..4794bd89 100644 --- a/tests/validations/test_metrics.py +++ b/tests/validations/test_metrics.py @@ -328,12 +328,38 @@ def test_derived_metric() -> None: # noqa: D ], ), ), + metric_with_guaranteed_meta( + name="has_custom_grain_offset_window", # this is valid + type=MetricType.DERIVED, + type_params=PydanticMetricTypeParams( + expr="random_metric * 2", + metrics=[ + PydanticMetricInput( + name="random_metric", + offset_window=PydanticMetricTimeWindow.parse("3 martian_weeks"), + ) + ], + ), + ), + metric_with_guaranteed_meta( + name="has_custom_offset_to_grain", + type=MetricType.DERIVED, + type_params=PydanticMetricTypeParams( + expr="random_metric * 2", + metrics=[ + PydanticMetricInput( + name="random_metric", + offset_to_grain="martian_week", + ) + ], + ), + ), ], project_configuration=EXAMPLE_PROJECT_CONFIGURATION, ) ) build_issues = validation_results.all_issues - assert len(build_issues) == 7 + assert len(build_issues) == 8 expected_substrings = [ "is already being used. Please choose another alias", "does not exist as a configured metric in the model", @@ -341,7 +367,9 @@ def test_derived_metric() -> None: # noqa: D "is not used in `expr`", "No input metrics found for derived metric", "No `expr` set for derived metric", - "Invalid time granularity", + "Invalid time granularity 'weekies' in window: '3 weekies'", + "Custom granularities are not supported", + "Invalid time granularity found in `offset_to_grain`: 'martian_week'", ] check_error_in_issues(error_substrings=expected_substrings, issues=build_issues) @@ -518,7 +546,7 @@ def test_conversion_metrics() -> None: # noqa: D ) build_issues = result.all_issues - assert len(result.errors) == 6 + assert len(result.errors) == 8 assert len(result.warnings) == 1 expected_substrings = [ @@ -528,7 +556,9 @@ def test_conversion_metrics() -> None: # noqa: D "The provided constant property: bad_dim, cannot be found", "The provided constant property: bad_dim2, cannot be found", "filtering on a conversion input measure is not fully supported yet", - "Invalid time granularity", + "Invalid time granularity 'moons' in window", + "Invalid time granularity 'martian_week' in window: '7 martian_weeks'", + "Invalid time granularity 'martian_week' in window: '7 martian_week'", ] check_error_in_issues(error_substrings=expected_substrings, issues=build_issues) @@ -661,17 +691,38 @@ def test_cumulative_metrics() -> None: # noqa: D ), ), ), + metric_with_guaranteed_meta( + name="custom_grain_to_date", + type=MetricType.CUMULATIVE, + type_params=PydanticMetricTypeParams( + measure=PydanticMetricInputMeasure(name=measure_name), + cumulative_type_params=PydanticCumulativeTypeParams( + grain_to_date="3 martian_weeks", + ), + ), + ), + metric_with_guaranteed_meta( + name="custom_window_old", + type=MetricType.CUMULATIVE, + type_params=PydanticMetricTypeParams( + measure=PydanticMetricInputMeasure(name=measure_name), + window=PydanticMetricTimeWindow.parse(window="5 martian_week"), + ), + ), ], project_configuration=EXAMPLE_PROJECT_CONFIGURATION, ) ) build_issues = validation_results.all_issues - assert len(build_issues) == 3 + assert len(build_issues) == 7 expected_substrings = [ "Invalid time granularity", "Both window and grain_to_date set for cumulative metric. Please set one or the other.", "Got differing values for `window`", + "Invalid time granularity 'martian_week' in window: '3 martian_weeks'", + "Invalid time granularity 'martian_week' in window: '3 martian_week'", + "Invalid time granularity 'martian_week' in window: '5 martian_week'", ] check_error_in_issues(error_substrings=expected_substrings, issues=build_issues) From e0fa2b1257c70a1df398b4d0a15e3b55eadf750f Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Mon, 2 Dec 2024 16:18:05 -0500 Subject: [PATCH 6/8] Use set for custom grain names (optimization) --- .../remove_plural_from_window_granularity.py | 8 +++---- .../validations/metrics.py | 24 +++++++++---------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/dbt_semantic_interfaces/transformations/remove_plural_from_window_granularity.py b/dbt_semantic_interfaces/transformations/remove_plural_from_window_granularity.py index de281823..cc21a6e9 100644 --- a/dbt_semantic_interfaces/transformations/remove_plural_from_window_granularity.py +++ b/dbt_semantic_interfaces/transformations/remove_plural_from_window_granularity.py @@ -1,4 +1,4 @@ -from typing import Sequence +from typing import Set from typing_extensions import override @@ -30,7 +30,7 @@ def _implements_protocol(self) -> SemanticManifestTransformRule[PydanticSemantic @staticmethod def _update_metric( - semantic_manifest: PydanticSemanticManifest, metric_name: str, custom_granularity_names: Sequence[str] + semantic_manifest: PydanticSemanticManifest, metric_name: str, custom_granularity_names: Set[str] ) -> None: """Mutates all the MetricTimeWindow by reparsing to remove the trailing 's'.""" valid_time_granularities = {item.value.lower() for item in TimeGranularity} | set( @@ -81,11 +81,11 @@ def trim_trailing_s(window: PydanticMetricTimeWindow) -> PydanticMetricTimeWindo @staticmethod def transform_model(semantic_manifest: PydanticSemanticManifest) -> PydanticSemanticManifest: # noqa: D - custom_granularity_names = [ + custom_granularity_names = { granularity.name for time_spine in semantic_manifest.project_configuration.time_spines for granularity in time_spine.custom_granularities - ] + } for metric in semantic_manifest.metrics: RemovePluralFromWindowGranularityRule._update_metric( diff --git a/dbt_semantic_interfaces/validations/metrics.py b/dbt_semantic_interfaces/validations/metrics.py index 57d7c7d4..b1078b7f 100644 --- a/dbt_semantic_interfaces/validations/metrics.py +++ b/dbt_semantic_interfaces/validations/metrics.py @@ -1,4 +1,4 @@ -from typing import Dict, Generic, List, Optional, Sequence +from typing import Dict, Generic, List, Optional, Sequence, Set from dbt_semantic_interfaces.implementations.metric import PydanticMetric from dbt_semantic_interfaces.protocols import ( @@ -49,11 +49,11 @@ class CumulativeMetricRule(SemanticManifestValidationRule[SemanticManifestT], Ge def validate_manifest(cls, semantic_manifest: SemanticManifestT) -> Sequence[ValidationIssue]: # noqa: D issues: List[ValidationIssue] = [] - custom_granularity_names = [ + custom_granularity_names = { granularity.name for time_spine in semantic_manifest.project_configuration.time_spines for granularity in time_spine.custom_granularities - ] + } standard_granularities = {item.value.lower() for item in TimeGranularity} for metric in semantic_manifest.metrics or []: @@ -131,13 +131,13 @@ def validate_metric_time_window( # noqa: D cls, metric_context: MetricContext, window: MetricTimeWindow, - custom_granularities: Sequence[str], + custom_granularities: Set[str], allow_custom: bool = False, ) -> Sequence[ValidationIssue]: issues: List[ValidationIssue] = [] standard_granularities = {item.value.lower() for item in TimeGranularity} - valid_granularities = set(custom_granularities) | standard_granularities + valid_granularities = custom_granularities | standard_granularities window_granularity = window.granularity if window_granularity.endswith("s") and window_granularity[:-1] in valid_granularities: # months -> month @@ -224,7 +224,7 @@ def _validate_input_metrics_exist(semantic_manifest: SemanticManifest) -> List[V @staticmethod @validate_safely(whats_being_done="checking that input metric time offset params are valid") - def _validate_time_offset_params(metric: Metric, custom_granularities: Sequence[str]) -> List[ValidationIssue]: + def _validate_time_offset_params(metric: Metric, custom_granularities: Set[str]) -> Sequence[ValidationIssue]: issues: List[ValidationIssue] = [] standard_granularities = {item.value.lower() for item in TimeGranularity} @@ -303,11 +303,11 @@ def _validate_expr(metric: Metric) -> List[ValidationIssue]: def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[ValidationIssue]: # noqa: D issues: List[ValidationIssue] = [] - custom_granularity_names = [ + custom_granularity_names = { granularity.name for time_spine in semantic_manifest.project_configuration.time_spines for granularity in time_spine.custom_granularities - ] + } issues += DerivedMetricRule._validate_input_metrics_exist(semantic_manifest=semantic_manifest) for metric in semantic_manifest.metrics or []: @@ -325,8 +325,8 @@ class ConversionMetricRule(SemanticManifestValidationRule[SemanticManifestT], Ge @staticmethod @validate_safely(whats_being_done="checking that the params of metric are valid if it is a conversion metric") def _validate_type_params( - metric: Metric, conversion_type_params: ConversionTypeParams, custom_granularity_names: Sequence[str] - ) -> List[ValidationIssue]: + metric: Metric, conversion_type_params: ConversionTypeParams, custom_granularity_names: Set[str] + ) -> Sequence[ValidationIssue]: issues: List[ValidationIssue] = [] window = conversion_type_params.window @@ -490,11 +490,11 @@ def _get_semantic_model_from_measure( def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[ValidationIssue]: # noqa: D issues: List[ValidationIssue] = [] - custom_granularity_names = [ + custom_granularity_names = { granularity.name for time_spine in semantic_manifest.project_configuration.time_spines for granularity in time_spine.custom_granularities - ] + } for metric in semantic_manifest.metrics or []: if metric.type == MetricType.CONVERSION: From 99c6043a0ef66bc7ba09e01f66b6afd9527f028d Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Mon, 2 Dec 2024 16:20:22 -0500 Subject: [PATCH 7/8] Retain type-checking for decorated functions In the process of writing the prior commit, I discovered that type checking was being erased for function parameters if this decorator was used. This commit fixes that. --- .../validations/validator_helpers.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/dbt_semantic_interfaces/validations/validator_helpers.py b/dbt_semantic_interfaces/validations/validator_helpers.py index a94b63eb..d1d60036 100644 --- a/dbt_semantic_interfaces/validations/validator_helpers.py +++ b/dbt_semantic_interfaces/validations/validator_helpers.py @@ -7,7 +7,6 @@ from datetime import date from enum import Enum from typing import ( - Any, Callable, Dict, Generic, @@ -20,6 +19,7 @@ ) import click +from typing_extensions import ParamSpec from dbt_semantic_interfaces.implementations.base import FrozenBaseModel from dbt_semantic_interfaces.protocols import Metadata, SemanticManifestT, SemanticModel @@ -35,6 +35,8 @@ ValidationContextJSON = Dict[str, Union[str, int, None]] ValidationIssueJSON = Dict[str, Union[str, int, ValidationContextJSON]] +P = ParamSpec("P") + class ValidationIssueLevel(Enum): """Categorize the issues found while validating a semantic manifest.""" @@ -384,18 +386,21 @@ def generate_exception_issue( ) -def _func_args_to_string(*args: Any, **kwargs: Any) -> str: # type: ignore +def _func_args_to_string(*args: P.args, **kwargs: P.kwargs) -> str: # type: ignore return f"positional args: {args}, key word args: {kwargs}" -def validate_safely(whats_being_done: str) -> Callable: +def validate_safely( + whats_being_done: str, +) -> Callable[[Callable[P, Sequence[ValidationIssue]]], Callable[P, Sequence[ValidationIssue]]]: """Decorator to safely run validation checks.""" - def decorator_check_element_safely(func: Callable) -> Callable: # noqa + def decorator_check_element_safely( + func: Callable[P, Sequence[ValidationIssue]] + ) -> Callable[P, Sequence[ValidationIssue]]: @functools.wraps(func) - def wrapper(*args: Any, **kwargs: Any) -> List[ValidationIssue]: # type: ignore + def wrapper(*args: P.args, **kwargs: P.kwargs) -> Sequence[ValidationIssue]: # type: ignore """Safely run a check on model elements.""" - issues: List[ValidationIssue] try: issues = func(*args, **kwargs) except Exception as e: From cbdef9512068602aa196fd9368f25eb24fda25a0 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Mon, 2 Dec 2024 16:21:00 -0500 Subject: [PATCH 8/8] The last commit added type checking that had been disabled, which then revealed some type errors. This commit fixes those revealed type errors. --- .../validations/agg_time_dimension.py | 2 +- .../validations/common_entities.py | 14 +++++++----- .../validations/dimension_const.py | 4 ++-- .../validations/element_const.py | 7 ++---- .../validations/entities.py | 2 +- .../validations/measures.py | 4 ++-- .../validations/metrics.py | 18 ++++++++------- .../validations/non_empty.py | 14 +++++++----- .../validations/reserved_keywords.py | 8 +++---- .../validations/semantic_models.py | 4 ++-- .../validations/unique_valid_name.py | 22 ++++++++++--------- tests/validations/test_validator_helpers.py | 6 ++--- 12 files changed, 56 insertions(+), 49 deletions(-) diff --git a/dbt_semantic_interfaces/validations/agg_time_dimension.py b/dbt_semantic_interfaces/validations/agg_time_dimension.py index 0d21a55b..41a010ce 100644 --- a/dbt_semantic_interfaces/validations/agg_time_dimension.py +++ b/dbt_semantic_interfaces/validations/agg_time_dimension.py @@ -28,7 +28,7 @@ def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[Validati @staticmethod @validate_safely(whats_being_done="checking aggregation time dimension for a semantic model") - def _validate_semantic_model(semantic_model: SemanticModel) -> List[ValidationIssue]: + def _validate_semantic_model(semantic_model: SemanticModel) -> Sequence[ValidationIssue]: issues: List[ValidationIssue] = [] for measure in semantic_model.measures: diff --git a/dbt_semantic_interfaces/validations/common_entities.py b/dbt_semantic_interfaces/validations/common_entities.py index d6d4d993..4cb8830f 100644 --- a/dbt_semantic_interfaces/validations/common_entities.py +++ b/dbt_semantic_interfaces/validations/common_entities.py @@ -37,7 +37,7 @@ def _check_entity( entity: Entity, semantic_model: SemanticModel, entities_to_semantic_models: Dict[EntityReference, Set[str]], - ) -> List[ValidationIssue]: + ) -> Sequence[ValidationIssue]: issues: List[ValidationIssue] = [] # If the entity is the dict and if the set of semantic models minus this semantic model is empty, # then we warn the user that their entity will be unused in joins @@ -65,15 +65,17 @@ def _check_entity( @validate_safely(whats_being_done="running model validation warning if entities are only one one semantic model") def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[ValidationIssue]: """Issues a warning for any entity that is associated with only one semantic_model.""" - issues = [] + issues: List[ValidationIssue] = [] entities_to_semantic_models = CommonEntitysRule._map_semantic_model_entities(semantic_manifest.semantic_models) for semantic_model in semantic_manifest.semantic_models or []: for entity in semantic_model.entities or []: - issues += CommonEntitysRule._check_entity( - entity=entity, - semantic_model=semantic_model, - entities_to_semantic_models=entities_to_semantic_models, + issues.extend( + CommonEntitysRule._check_entity( + entity=entity, + semantic_model=semantic_model, + entities_to_semantic_models=entities_to_semantic_models, + ) ) return issues diff --git a/dbt_semantic_interfaces/validations/dimension_const.py b/dbt_semantic_interfaces/validations/dimension_const.py index bd20f286..a584015a 100644 --- a/dbt_semantic_interfaces/validations/dimension_const.py +++ b/dbt_semantic_interfaces/validations/dimension_const.py @@ -58,7 +58,7 @@ def _validate_dimension( dimension: Dimension, time_dims_to_granularity: Dict[DimensionReference, TimeGranularity], semantic_model: SemanticModel, - ) -> List[ValidationIssue]: + ) -> Sequence[ValidationIssue]: """Check that time dimensions of the same name and aren't primary have the same time granularity. Args: @@ -104,7 +104,7 @@ def _validate_semantic_model( semantic_model: SemanticModel, dimension_to_invariant: Dict[DimensionReference, DimensionInvariants], update_invariant_dict: bool, - ) -> List[ValidationIssue]: + ) -> Sequence[ValidationIssue]: """Checks that the given semantic model has dimensions consistent with the given invariants. Args: diff --git a/dbt_semantic_interfaces/validations/element_const.py b/dbt_semantic_interfaces/validations/element_const.py index 3b222431..3e99c758 100644 --- a/dbt_semantic_interfaces/validations/element_const.py +++ b/dbt_semantic_interfaces/validations/element_const.py @@ -1,9 +1,6 @@ from collections import defaultdict from typing import DefaultDict, Generic, List, Sequence -from dbt_semantic_interfaces.implementations.semantic_manifest import ( - PydanticSemanticManifest, -) from dbt_semantic_interfaces.protocols import SemanticManifestT from dbt_semantic_interfaces.references import SemanticModelReference from dbt_semantic_interfaces.validations.validator_helpers import ( @@ -28,7 +25,7 @@ class ElementConsistencyRule(SemanticManifestValidationRule[SemanticManifestT], @staticmethod @validate_safely(whats_being_done="running model validation ensuring model wide element consistency") - def validate_manifest(semantic_manifest: PydanticSemanticManifest) -> Sequence[ValidationIssue]: # noqa: D + def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[ValidationIssue]: # noqa: D issues = [] element_name_to_types = ElementConsistencyRule._get_element_name_to_types(semantic_manifest=semantic_manifest) invalid_elements = { @@ -54,7 +51,7 @@ def validate_manifest(semantic_manifest: PydanticSemanticManifest) -> Sequence[V @staticmethod def _get_element_name_to_types( - semantic_manifest: PydanticSemanticManifest, + semantic_manifest: SemanticManifestT, ) -> DefaultDict[str, DefaultDict[SemanticModelElementType, List[SemanticModelContext]]]: """Create a mapping of element names in the semantic manifest to types with a list of associated contexts.""" element_types: DefaultDict[ diff --git a/dbt_semantic_interfaces/validations/entities.py b/dbt_semantic_interfaces/validations/entities.py index 9d9755b4..47b55670 100644 --- a/dbt_semantic_interfaces/validations/entities.py +++ b/dbt_semantic_interfaces/validations/entities.py @@ -26,7 +26,7 @@ class NaturalEntityConfigurationRule(SemanticManifestValidationRule[SemanticMani "natural entities are used in the appropriate contexts" ) ) - def _validate_semantic_model_natural_entities(semantic_model: SemanticModel) -> List[ValidationIssue]: + def _validate_semantic_model_natural_entities(semantic_model: SemanticModel) -> Sequence[ValidationIssue]: issues: List[ValidationIssue] = [] context = SemanticModelContext( file_context=FileContext.from_metadata(metadata=semantic_model.metadata), diff --git a/dbt_semantic_interfaces/validations/measures.py b/dbt_semantic_interfaces/validations/measures.py index 94a72981..2e587d18 100644 --- a/dbt_semantic_interfaces/validations/measures.py +++ b/dbt_semantic_interfaces/validations/measures.py @@ -65,7 +65,7 @@ class MeasureConstraintAliasesRule(SemanticManifestValidationRule[SemanticManife @staticmethod @validate_safely(whats_being_done="ensuring measures aliases are set when required") - def _validate_required_aliases_are_set(metric: Metric, metric_context: MetricContext) -> List[ValidationIssue]: + def _validate_required_aliases_are_set(metric: Metric, metric_context: MetricContext) -> Sequence[ValidationIssue]: """Checks if valid aliases are set on the input measure references where they are required. Aliases are required whenever there are 2 or more input measures with the same measure @@ -188,7 +188,7 @@ class MetricMeasuresRule(SemanticManifestValidationRule[SemanticManifestT], Gene @staticmethod @validate_safely(whats_being_done="checking all measures referenced by the metric exist") - def _validate_metric_measure_references(metric: Metric, valid_measure_names: Set[str]) -> List[ValidationIssue]: + def _validate_metric_measure_references(metric: Metric, valid_measure_names: Set[str]) -> Sequence[ValidationIssue]: issues: List[ValidationIssue] = [] for measure_reference in metric.measure_references: diff --git a/dbt_semantic_interfaces/validations/metrics.py b/dbt_semantic_interfaces/validations/metrics.py index b1078b7f..a9058636 100644 --- a/dbt_semantic_interfaces/validations/metrics.py +++ b/dbt_semantic_interfaces/validations/metrics.py @@ -120,8 +120,10 @@ def validate_manifest(cls, semantic_manifest: SemanticManifestT) -> Sequence[Val ) if window: - issues += cls.validate_metric_time_window( - metric_context=metric_context, window=window, custom_granularities=custom_granularity_names + issues.extend( + cls.validate_metric_time_window( + metric_context=metric_context, window=window, custom_granularities=custom_granularity_names + ) ) return issues @@ -167,7 +169,7 @@ class DerivedMetricRule(SemanticManifestValidationRule[SemanticManifestT], Gener @staticmethod @validate_safely(whats_being_done="checking that the alias set are not unique and distinct") - def _validate_alias_collision(metric: Metric) -> List[ValidationIssue]: + def _validate_alias_collision(metric: Metric) -> Sequence[ValidationIssue]: issues: List[ValidationIssue] = [] if metric.type == MetricType.DERIVED: @@ -193,7 +195,7 @@ def _validate_alias_collision(metric: Metric) -> List[ValidationIssue]: @staticmethod @validate_safely(whats_being_done="checking that the input metrics exist") - def _validate_input_metrics_exist(semantic_manifest: SemanticManifest) -> List[ValidationIssue]: + def _validate_input_metrics_exist(semantic_manifest: SemanticManifest) -> Sequence[ValidationIssue]: issues: List[ValidationIssue] = [] all_metrics = {m.name for m in semantic_manifest.metrics} @@ -264,7 +266,7 @@ def _validate_time_offset_params(metric: Metric, custom_granularities: Set[str]) @staticmethod @validate_safely(whats_being_done="checking that the expr field uses the input metrics") - def _validate_expr(metric: Metric) -> List[ValidationIssue]: + def _validate_expr(metric: Metric) -> Sequence[ValidationIssue]: issues: List[ValidationIssue] = [] if metric.type == MetricType.DERIVED: @@ -346,7 +348,7 @@ def _validate_type_params( @validate_safely(whats_being_done="checks that the entity exists in the base/conversion semantic model") def _validate_entity_exists( metric: Metric, entity: str, base_semantic_model: SemanticModel, conversion_semantic_model: SemanticModel - ) -> List[ValidationIssue]: + ) -> Sequence[ValidationIssue]: issues: List[ValidationIssue] = [] if entity not in {entity.name for entity in base_semantic_model.entities}: @@ -376,7 +378,7 @@ def _validate_entity_exists( @validate_safely(whats_being_done="checks that the provided measures are valid for conversion metrics") def _validate_measures( metric: Metric, base_semantic_model: SemanticModel, conversion_semantic_model: SemanticModel - ) -> List[ValidationIssue]: + ) -> Sequence[ValidationIssue]: issues: List[ValidationIssue] = [] def _validate_measure( @@ -438,7 +440,7 @@ def _validate_measure( @validate_safely(whats_being_done="checks that the provided constant properties are valid") def _validate_constant_properties( metric: Metric, base_semantic_model: SemanticModel, conversion_semantic_model: SemanticModel - ) -> List[ValidationIssue]: + ) -> Sequence[ValidationIssue]: issues: List[ValidationIssue] = [] def _elements_in_model(references: List[str], semantic_model: SemanticModel) -> None: diff --git a/dbt_semantic_interfaces/validations/non_empty.py b/dbt_semantic_interfaces/validations/non_empty.py index edce85cd..375f41d9 100644 --- a/dbt_semantic_interfaces/validations/non_empty.py +++ b/dbt_semantic_interfaces/validations/non_empty.py @@ -17,7 +17,7 @@ class NonEmptyRule(SemanticManifestValidationRule[SemanticManifestT], Generic[Se @staticmethod @validate_safely(whats_being_done="checking that the model has semantic models") - def _check_model_has_semantic_models(semantic_manifest: PydanticSemanticManifest) -> List[ValidationIssue]: + def _check_model_has_semantic_models(semantic_manifest: PydanticSemanticManifest) -> Sequence[ValidationIssue]: issues: List[ValidationIssue] = [] if not semantic_manifest.semantic_models: issues.append( @@ -29,7 +29,7 @@ def _check_model_has_semantic_models(semantic_manifest: PydanticSemanticManifest @staticmethod @validate_safely(whats_being_done="checking that the model has metrics") - def _check_model_has_metrics(semantic_manifest: PydanticSemanticManifest) -> List[ValidationIssue]: + def _check_model_has_metrics(semantic_manifest: PydanticSemanticManifest) -> Sequence[ValidationIssue]: issues: List[ValidationIssue] = [] # If we are going to generate measure proxy metrics that is sufficient as well @@ -50,8 +50,12 @@ def _check_model_has_metrics(semantic_manifest: PydanticSemanticManifest) -> Lis @staticmethod @validate_safely("running model validation rule ensuring metrics and semantic models are defined") - def validate_manifest(semantic_manifest: PydanticSemanticManifest) -> Sequence[ValidationIssue]: # noqa: D + def validate_manifest( # noqa: D + # PydanticSemanticManifest is required here due to a Measure.create_metric call downstream. + # TODO: can we add create_metric to the Measure protocol to avoid this type override? + semantic_manifest: PydanticSemanticManifest, # type: ignore[override] + ) -> Sequence[ValidationIssue]: issues: List[ValidationIssue] = [] - issues += NonEmptyRule._check_model_has_semantic_models(semantic_manifest=semantic_manifest) - issues += NonEmptyRule._check_model_has_metrics(semantic_manifest=semantic_manifest) + issues.extend(NonEmptyRule._check_model_has_semantic_models(semantic_manifest=semantic_manifest)) + issues.extend(NonEmptyRule._check_model_has_metrics(semantic_manifest=semantic_manifest)) return issues diff --git a/dbt_semantic_interfaces/validations/reserved_keywords.py b/dbt_semantic_interfaces/validations/reserved_keywords.py index ce9e2cb2..e78e0479 100644 --- a/dbt_semantic_interfaces/validations/reserved_keywords.py +++ b/dbt_semantic_interfaces/validations/reserved_keywords.py @@ -1,4 +1,4 @@ -from typing import Generic, List +from typing import Generic, List, Sequence from dbt_semantic_interfaces.protocols import ( SemanticManifest, @@ -67,7 +67,7 @@ class ReservedKeywordsRule(SemanticManifestValidationRule[SemanticManifestT], Ge @staticmethod @validate_safely(whats_being_done="checking that semantic model sub element names aren't reserved sql keywords") - def _validate_semantic_model_sub_elements(semantic_model: SemanticModel) -> List[ValidationIssue]: + def _validate_semantic_model_sub_elements(semantic_model: SemanticModel) -> Sequence[ValidationIssue]: issues: List[ValidationIssue] = [] for dimension in semantic_model.dimensions: @@ -125,7 +125,7 @@ def _validate_semantic_model_sub_elements(semantic_model: SemanticModel) -> List @classmethod @validate_safely(whats_being_done="checking that semantic_model node_relations are not sql reserved keywords") - def _validate_semantic_models(cls, semantic_manifest: SemanticManifest) -> List[ValidationIssue]: + def _validate_semantic_models(cls, semantic_manifest: SemanticManifest) -> Sequence[ValidationIssue]: """Checks names of objects that are not nested.""" issues: List[ValidationIssue] = [] set_keywords = set(RESERVED_KEYWORDS) @@ -156,5 +156,5 @@ def _validate_semantic_models(cls, semantic_manifest: SemanticManifest) -> List[ whats_being_done="running model validation ensuring elements that aren't selected via a defined expr don't " "contain reserved keywords" ) - def validate_manifest(cls, semantic_manifest: SemanticManifest) -> List[ValidationIssue]: # noqa: D + def validate_manifest(cls, semantic_manifest: SemanticManifest) -> Sequence[ValidationIssue]: # noqa: D return cls._validate_semantic_models(semantic_manifest=semantic_manifest) diff --git a/dbt_semantic_interfaces/validations/semantic_models.py b/dbt_semantic_interfaces/validations/semantic_models.py index 0241131d..1433b878 100644 --- a/dbt_semantic_interfaces/validations/semantic_models.py +++ b/dbt_semantic_interfaces/validations/semantic_models.py @@ -36,7 +36,7 @@ def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[Validati whats_being_done="checking the semantic model's validity parameters for compatibility with " "runtime requirements" ) - def _validate_semantic_model(semantic_model: SemanticModel) -> List[ValidationIssue]: + def _validate_semantic_model(semantic_model: SemanticModel) -> Sequence[ValidationIssue]: """Runs assertions on semantic models with validity parameters set on one or more time dimensions.""" issues: List[ValidationIssue] = [] @@ -157,7 +157,7 @@ def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[Validati @staticmethod @validate_safely(whats_being_done="checking validity of the semantic model's default agg_time_dimension") - def _validate_default_agg_time_dimension(semantic_model: SemanticModel) -> List[ValidationIssue]: + def _validate_default_agg_time_dimension(semantic_model: SemanticModel) -> Sequence[ValidationIssue]: issues: List[ValidationIssue] = [] if semantic_model.defaults is None or semantic_model.defaults.agg_time_dimension is None: diff --git a/dbt_semantic_interfaces/validations/unique_valid_name.py b/dbt_semantic_interfaces/validations/unique_valid_name.py index 203bd2d9..dbc70340 100644 --- a/dbt_semantic_interfaces/validations/unique_valid_name.py +++ b/dbt_semantic_interfaces/validations/unique_valid_name.py @@ -69,7 +69,9 @@ class UniqueAndValidNameRule(SemanticManifestValidationRule[SemanticManifestT], NAME_REGEX = re.compile(r"\A[a-z]((?!__)[a-z0-9_])*[a-z0-9]\Z") @staticmethod - def check_valid_name(name: str, context: Optional[ValidationContext] = None) -> List[ValidationIssue]: # noqa: D + def check_valid_name( # noqa: D + name: str, context: Optional[ValidationContext] = None + ) -> Sequence[ValidationIssue]: issues: List[ValidationIssue] = [] if not UniqueAndValidNameRule.NAME_REGEX.match(name): @@ -102,7 +104,9 @@ def check_valid_name(name: str, context: Optional[ValidationContext] = None) -> @staticmethod @validate_safely(whats_being_done="checking semantic model sub element names are unique") - def _validate_semantic_model_elements_and_time_spines(semantic_manifest: SemanticManifest) -> List[ValidationIssue]: + def _validate_semantic_model_elements_and_time_spines( + semantic_manifest: SemanticManifest, + ) -> Sequence[ValidationIssue]: issues: List[ValidationIssue] = [] custom_granularity_restricted_names_and_types: Dict[str, str] = {} @@ -219,9 +223,9 @@ def _validate_semantic_model_elements_and_time_spines(semantic_manifest: Semanti @staticmethod @validate_safely(whats_being_done="checking top level elements of a specific type have unique and valid names") def _validate_top_level_objects_of_type( - objects: Union[List[SemanticModel], List[Metric], List[SavedQuery]], + objects: Union[Sequence[SemanticModel], Sequence[Metric], Sequence[SavedQuery]], object_type: SemanticManifestNodeType, - ) -> List[ValidationIssue]: + ) -> Sequence[ValidationIssue]: """Validates uniqeness and validaty of top level objects of singular type.""" issues: List[ValidationIssue] = [] object_names = set() @@ -247,11 +251,9 @@ def _validate_top_level_objects_of_type( @staticmethod @validate_safely(whats_being_done="checking model top level element names are sufficiently unique") - def _validate_top_level_objects(semantic_manifest: SemanticManifest) -> List[ValidationIssue]: + def _validate_top_level_objects(semantic_manifest: SemanticManifest) -> Sequence[ValidationIssue]: """Checks names of objects that are not nested.""" - issues: List[ValidationIssue] = [] - - issues.extend( + issues = list( UniqueAndValidNameRule._validate_top_level_objects_of_type( semantic_manifest.semantic_models, SemanticManifestNodeType.SEMANTIC_MODEL ) @@ -274,7 +276,7 @@ def _validate_top_level_objects(semantic_manifest: SemanticManifest) -> List[Val @staticmethod @validate_safely(whats_being_done="running model validation ensuring elements have adequately unique names") def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[ValidationIssue]: # noqa: D - issues = [] + issues: List[ValidationIssue] = [] issues += UniqueAndValidNameRule._validate_top_level_objects(semantic_manifest=semantic_manifest) issues += UniqueAndValidNameRule._validate_semantic_model_elements_and_time_spines(semantic_manifest) @@ -336,7 +338,7 @@ def _check_semantic_model( # noqa: D @staticmethod @validate_safely(whats_being_done="validating there are no duplicate dimension primary entity pairs") def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[ValidationIssue]: # noqa: D - issues = [] + issues: List[ValidationIssue] = [] known_pairings: Dict[str, Dict[str, str]] = {} for semantic_model in semantic_manifest.semantic_models: issues += PrimaryEntityDimensionPairs._check_semantic_model( diff --git a/tests/validations/test_validator_helpers.py b/tests/validations/test_validator_helpers.py index 056ceb1a..8056d3b6 100644 --- a/tests/validations/test_validator_helpers.py +++ b/tests/validations/test_validator_helpers.py @@ -1,5 +1,5 @@ from datetime import date -from typing import List +from typing import List, Sequence import pytest @@ -25,7 +25,7 @@ @pytest.fixture -def list_of_issues() -> List[ValidationIssue]: # noqa: D +def list_of_issues() -> Sequence[ValidationIssue]: # noqa: D file_context = FileContext(file_name="foo", line_number=1337) semantic_model_name = "My semantic model" @@ -141,7 +141,7 @@ def test_merge_two_model_validation_results(list_of_issues: List[ValidationIssue def test_validate_safely_handles_exceptions(): # noqa: D @validate_safely("testing validate safely handles exceptions gracefully") - def checking_validate_safely() -> List[ValidationIssue]: + def checking_validate_safely() -> Sequence[ValidationIssue]: raise (Exception("Oh no an exception!")) return []