From c82679c65e68ef5f84b429372918415f17447b17 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Fri, 26 Jul 2024 11:39:09 -0700 Subject: [PATCH] Add validations for time spines --- dbt_semantic_interfaces/protocols/__init__.py | 4 + .../validations/metrics.py | 4 +- .../semantic_manifest_validator.py | 12 +- .../validations/time_spines.py | 116 ++++++++++ tests/validations/test_time_spines.py | 211 ++++++++++++++++++ 5 files changed, 340 insertions(+), 7 deletions(-) create mode 100644 dbt_semantic_interfaces/validations/time_spines.py create mode 100644 tests/validations/test_time_spines.py diff --git a/dbt_semantic_interfaces/protocols/__init__.py b/dbt_semantic_interfaces/protocols/__init__.py index c77bb36b..76414058 100644 --- a/dbt_semantic_interfaces/protocols/__init__.py +++ b/dbt_semantic_interfaces/protocols/__init__.py @@ -32,6 +32,10 @@ SemanticModelDefaults, SemanticModelT, ) +from dbt_semantic_interfaces.protocols.time_spine import ( # noqa:F401 + TimeSpine, + TimeSpinePrimaryColumn, +) from dbt_semantic_interfaces.protocols.where_filter import ( # noqa:F401 WhereFilter, WhereFilterIntersection, diff --git a/dbt_semantic_interfaces/validations/metrics.py b/dbt_semantic_interfaces/validations/metrics.py index be9e57c0..01dfcdaf 100644 --- a/dbt_semantic_interfaces/validations/metrics.py +++ b/dbt_semantic_interfaces/validations/metrics.py @@ -39,10 +39,10 @@ class CumulativeMetricRule(SemanticManifestValidationRule[SemanticManifestT], Generic[SemanticManifestT]): - """Checks that cumulative sum metrics are configured properly.""" + """Checks that cumulative metrics are configured properly.""" @staticmethod - @validate_safely(whats_being_done="running model validation ensuring cumulative sum metrics are valid") + @validate_safely(whats_being_done="running model validation ensuring cumulative metrics are valid") def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[ValidationIssue]: # noqa: D issues: List[ValidationIssue] = [] diff --git a/dbt_semantic_interfaces/validations/semantic_manifest_validator.py b/dbt_semantic_interfaces/validations/semantic_manifest_validator.py index 2093a9bc..26f81452 100644 --- a/dbt_semantic_interfaces/validations/semantic_manifest_validator.py +++ b/dbt_semantic_interfaces/validations/semantic_manifest_validator.py @@ -37,6 +37,7 @@ SemanticModelDefaultsRule, SemanticModelValidityWindowRule, ) +from dbt_semantic_interfaces.validations.time_spines import TimeSpineRule from dbt_semantic_interfaces.validations.unique_valid_name import ( PrimaryEntityDimensionPairs, UniqueAndValidNameRule, @@ -91,6 +92,7 @@ class SemanticManifestValidator(Generic[SemanticManifestT]): SemanticModelLabelsRule[SemanticManifestT](), EntityLabelsRule[SemanticManifestT](), ConversionMetricRule[SemanticManifestT](), + TimeSpineRule[SemanticManifestT](), ) def __init__( @@ -100,7 +102,7 @@ def __init__( Args: rules: List of validation rules to run. Defaults to DEFAULT_RULES - max_workers: sets the max number of rules to run against the model concurrently + max_workers: sets the max number of rules to run against the semantic_manifest concurrently """ # Raises an error if 'rules' is an empty sequence or None if not rules: @@ -147,7 +149,7 @@ def _validate_multi_process( # noqa: D def checked_validations(self, semantic_manifest: SemanticManifestT) -> None: """Similar to validate(), but throws an exception if validation fails.""" - model_copy = copy.deepcopy(semantic_manifest) - model_issues = self.validate_semantic_manifest(model_copy) - if model_issues.has_blocking_issues: - raise SemanticManifestValidationException(issues=tuple(model_issues.all_issues)) + semantic_manifest_copy = copy.deepcopy(semantic_manifest) + semantic_manifest_issues = self.validate_semantic_manifest(semantic_manifest_copy) + if semantic_manifest_issues.has_blocking_issues: + raise SemanticManifestValidationException(issues=tuple(semantic_manifest_issues.all_issues)) diff --git a/dbt_semantic_interfaces/validations/time_spines.py b/dbt_semantic_interfaces/validations/time_spines.py new file mode 100644 index 00000000..107ad65c --- /dev/null +++ b/dbt_semantic_interfaces/validations/time_spines.py @@ -0,0 +1,116 @@ +import traceback +from typing import Dict, Generic, List, Optional, Sequence, Set + +from dbt_semantic_interfaces.errors import ParsingException +from dbt_semantic_interfaces.implementations.metric import ( + PydanticMetric, + PydanticMetricTimeWindow, +) +from dbt_semantic_interfaces.protocols import ( + TimeSpine, + ConversionTypeParams, + Dimension, + Metric, + SemanticManifest, + SemanticManifestT, + SemanticModel, +) +from dbt_semantic_interfaces.references import ( + DimensionReference, + MeasureReference, + MetricModelReference, + MetricReference, +) +from dbt_semantic_interfaces.type_enums import ( + AggregationType, + MetricType, + TimeGranularity, +) +from dbt_semantic_interfaces.validations.unique_valid_name import UniqueAndValidNameRule +from dbt_semantic_interfaces.validations.validator_helpers import ( + FileContext, + MetricContext, + SemanticManifestValidationRule, + ValidationError, + ValidationIssue, + ValidationWarning, + generate_exception_issue, + validate_safely, +) + + +class TimeSpineRule(SemanticManifestValidationRule[SemanticManifestT], Generic[SemanticManifestT]): + """Checks that time spines are configured properly.""" + + @staticmethod + @validate_safely(whats_being_done="running model validation to ensure that time spines are valid") + def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[ValidationIssue]: + """Validate time spine configs. + + Note that some validation happens separately in the core parser before building this object: + - error if no time spine configured and legacy time spine model doeesn't exist + - error if granularity is missing for primary column + - error if primary column does not exist in the model + """ + issues: List[ValidationIssue] = [] + + if not semantic_manifest.semantic_models: + return issues + + time_spines = semantic_manifest.project_configuration.time_spines + if not time_spines: + # TODO: update docs link when new one is available! + docs_message = f"See documentation to configure: https://docs.getdbt.com/docs/build/metricflow-time-spine" + # If they have the old time spine configured and need to migrate + if semantic_manifest.project_configuration.time_spine_table_configurations: + issues.append( + ValidationWarning( + message=f"Time spines without YAML configuration are in the process of deprecation. Please add YAML " + "configuration for your 'metricflow_time_spine' model. " + docs_message + ) + ) + return issues + + # Verify that there is only one time spine per granularity + time_spines_by_granularity: Dict[TimeGranularity, List[TimeSpine]] = {} + granularities_with_multiple_time_spines: Set[TimeGranularity] = set() + for time_spine in time_spines: + granularity = time_spine.primary_column.time_granularity + if granularity in time_spines_by_granularity: + time_spines_by_granularity[granularity].append(time_spine) + else: + time_spines_by_granularity[granularity] = [time_spine] + if len(time_spines_by_granularity[granularity]) > 1: + granularities_with_multiple_time_spines.add(granularity) + + if granularities_with_multiple_time_spines: + duplicate_granularity_time_spines: Dict[str, str] = {} + for granularity in granularities_with_multiple_time_spines: + duplicate_granularity_time_spines[granularity.name] = [ + time_spine.node_relation.relation_name for time_spine in time_spines_by_granularity[granularity] + ] + issues.append( + ValidationWarning( + message=f"Only one time spine is supported per granularity. Got duplicates: {duplicate_granularity_time_spines}" + ) + ) + + # Warn if there is a time dimension configured with a smaller granularity than the smallest time spine granularity + dimension_granularities = { + dimension.type_params.time_granularity + for semantic_model in semantic_manifest.semantic_models + for dimension in semantic_model.dimensions + if dimension.type_params + } + smallest_dim_granularity = min(dimension_granularities) + smallest_time_spine_granularity = min(time_spines_by_granularity.keys()) + if smallest_dim_granularity < smallest_time_spine_granularity: + issues.append( + ValidationWarning( + message=f"To avoid unexpected query errors, configuring a time spine at or below the smallest time " + f"dimension granularity is recommended. Smallest time dimension granularity: " + f"{smallest_dim_granularity.name}; Smallest time spine granularity: {smallest_time_spine_granularity}" + ) + ) + + return issues diff --git a/tests/validations/test_time_spines.py b/tests/validations/test_time_spines.py new file mode 100644 index 00000000..7e321300 --- /dev/null +++ b/tests/validations/test_time_spines.py @@ -0,0 +1,211 @@ +from copy import deepcopy + +import pytest + +from dbt_semantic_interfaces.implementations.elements.dimension import ( + PydanticDimension, + PydanticDimensionTypeParams, +) +from dbt_semantic_interfaces.implementations.elements.entity import PydanticEntity +from dbt_semantic_interfaces.implementations.elements.measure import PydanticMeasure +from dbt_semantic_interfaces.implementations.filters.where_filter import ( + PydanticWhereFilter, + PydanticWhereFilterIntersection, +) +from dbt_semantic_interfaces.implementations.time_spine import PydanticTimeSpine, PydanticTimeSpinePrimaryColumn +from dbt_semantic_interfaces.implementations.project_configuration import ( + PydanticProjectConfiguration, + PydanticTimeSpineTableConfiguration, +) +from dbt_semantic_interfaces.implementations.node_relation import PydanticNodeRelation +from dbt_semantic_interfaces.implementations.metric import ( + PydanticConstantPropertyInput, + PydanticConversionTypeParams, + PydanticCumulativeTypeParams, + PydanticMetricInput, + PydanticMetricInputMeasure, + PydanticMetricTimeWindow, + PydanticMetricTypeParams, +) +from dbt_semantic_interfaces.implementations.semantic_manifest import ( + PydanticSemanticManifest, +) +from dbt_semantic_interfaces.references import ( + DimensionReference, + EntityReference, + TimeDimensionReference, +) +from dbt_semantic_interfaces.test_utils import ( + find_metric_with, + metric_with_guaranteed_meta, + semantic_model_with_guaranteed_meta, +) +from dbt_semantic_interfaces.type_enums import ( + AggregationType, + DimensionType, + EntityType, + MetricType, + PeriodAggregation, + TimeGranularity, +) +from dbt_semantic_interfaces.validations.metrics import ( + ConversionMetricRule, + CumulativeMetricRule, + DerivedMetricRule, + MetricTimeGranularityRule, + WhereFiltersAreParseable, +) +from dbt_semantic_interfaces.validations.semantic_manifest_validator import ( + SemanticManifestValidator, +) +from dbt_semantic_interfaces.validations.validator_helpers import ( + SemanticManifestValidationException, +) +from tests.example_project_configuration import EXAMPLE_PROJECT_CONFIGURATION + + +def test_valid_time_spines() -> None: # noqa: D + semantic_manifest = PydanticSemanticManifest( + semantic_models=[ + semantic_model_with_guaranteed_meta( + name="sum_measure", + measures=[ + PydanticMeasure(name="foo", agg=AggregationType.SUM, agg_time_dimension="dim", create_metric=True) + ], + dimensions=[ + PydanticDimension( + name="dim", + type=DimensionType.TIME, + type_params=PydanticDimensionTypeParams(time_granularity=TimeGranularity.SECOND), + ) + ], + entities=[PydanticEntity(name="entity", type=EntityType.PRIMARY)], + ), + ], + metrics=[], + project_configuration=PydanticProjectConfiguration( + time_spine_table_configurations=[], + time_spines=[ + PydanticTimeSpine( + node_relation=PydanticNodeRelation(alias="time_spine", schema_name="my_fav_schema"), + primary_column=PydanticTimeSpinePrimaryColumn(name="ds", time_granularity=TimeGranularity.DAY), + ), + PydanticTimeSpine( + node_relation=PydanticNodeRelation(alias="time_spine2", schema_name="my_fav_schema"), + primary_column=PydanticTimeSpinePrimaryColumn(name="ts", time_granularity=TimeGranularity.SECOND), + ), + ], + ), + ) + SemanticManifestValidator[PydanticSemanticManifest]().checked_validations(semantic_manifest) + + +def test_only_legacy_time_spine() -> None: # noqa: D + validator = SemanticManifestValidator[PydanticSemanticManifest]() + semantic_manifest = PydanticSemanticManifest( + semantic_models=[ + semantic_model_with_guaranteed_meta( + name="sum_measure", + measures=[ + PydanticMeasure(name="foo", agg=AggregationType.SUM, agg_time_dimension="dim", create_metric=True) + ], + dimensions=[ + PydanticDimension( + name="dim", + type=DimensionType.TIME, + type_params=PydanticDimensionTypeParams(time_granularity=TimeGranularity.SECOND), + ) + ], + entities=[PydanticEntity(name="entity", type=EntityType.PRIMARY)], + ), + ], + metrics=[], + project_configuration=PydanticProjectConfiguration( + time_spine_table_configurations=[ + PydanticTimeSpineTableConfiguration(location="hurrr", column_name="fun_col", grain=TimeGranularity.DAY) + ] + ), + ) + issues = validator.validate_semantic_manifest(semantic_manifest) + assert not issues.has_blocking_issues + assert len(issues.warnings) == 1 + assert "Time spines without YAML configuration are in the process of deprecation." in issues.warnings[0].message + + +def test_duplicate_time_spine_granularity() -> None: # noqa: D + validator = SemanticManifestValidator[PydanticSemanticManifest]() + semantic_manifest = PydanticSemanticManifest( + semantic_models=[ + semantic_model_with_guaranteed_meta( + name="sum_measure", + measures=[ + PydanticMeasure(name="foo", agg=AggregationType.SUM, agg_time_dimension="dim", create_metric=True) + ], + dimensions=[ + PydanticDimension( + name="dim", + type=DimensionType.TIME, + type_params=PydanticDimensionTypeParams(time_granularity=TimeGranularity.SECOND), + ) + ], + entities=[PydanticEntity(name="entity", type=EntityType.PRIMARY)], + ), + ], + metrics=[], + project_configuration=PydanticProjectConfiguration( + time_spine_table_configurations=[], + time_spines=[ + PydanticTimeSpine( + node_relation=PydanticNodeRelation(alias="time_spine", schema_name="my_fav_schema"), + primary_column=PydanticTimeSpinePrimaryColumn(name="ds", time_granularity=TimeGranularity.SECOND), + ), + PydanticTimeSpine( + node_relation=PydanticNodeRelation(alias="time_spine2", schema_name="my_fav_schema"), + primary_column=PydanticTimeSpinePrimaryColumn(name="ts", time_granularity=TimeGranularity.SECOND), + ), + ], + ), + ) + issues = validator.validate_semantic_manifest(semantic_manifest) + assert not issues.has_blocking_issues + assert len(issues.warnings) == 1 + assert "Only one time spine is supported per granularity." in issues.warnings[0].message + + +def test_dimension_granularity_smaller_than_time_spine() -> None: # noqa: D + validator = SemanticManifestValidator[PydanticSemanticManifest]() + semantic_manifest = PydanticSemanticManifest( + semantic_models=[ + semantic_model_with_guaranteed_meta( + name="sum_measure", + measures=[ + PydanticMeasure(name="foo", agg=AggregationType.SUM, agg_time_dimension="dim", create_metric=True) + ], + dimensions=[ + PydanticDimension( + name="dim", + type=DimensionType.TIME, + type_params=PydanticDimensionTypeParams(time_granularity=TimeGranularity.SECOND), + ) + ], + entities=[PydanticEntity(name="entity", type=EntityType.PRIMARY)], + ), + ], + metrics=[], + project_configuration=PydanticProjectConfiguration( + time_spine_table_configurations=[], + time_spines=[ + PydanticTimeSpine( + node_relation=PydanticNodeRelation(alias="time_spine", schema_name="my_fav_schema"), + primary_column=PydanticTimeSpinePrimaryColumn(name="ds", time_granularity=TimeGranularity.DAY), + ), + ], + ), + ) + issues = validator.validate_semantic_manifest(semantic_manifest) + assert not issues.has_blocking_issues + assert len(issues.warnings) == 1 + assert ( + "configuring a time spine at or below the smallest time dimension granularity is recommended" + in issues.warnings[0].message + )