Skip to content

Commit

Permalink
Add validations for time spines
Browse files Browse the repository at this point in the history
  • Loading branch information
courtneyholcomb committed Jul 26, 2024
1 parent 005493b commit c82679c
Show file tree
Hide file tree
Showing 5 changed files with 340 additions and 7 deletions.
4 changes: 4 additions & 0 deletions dbt_semantic_interfaces/protocols/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@
SemanticModelDefaults,
SemanticModelT,
)
from dbt_semantic_interfaces.protocols.time_spine import ( # noqa:F401
TimeSpine,
TimeSpinePrimaryColumn,
)
from dbt_semantic_interfaces.protocols.where_filter import ( # noqa:F401
WhereFilter,
WhereFilterIntersection,
Expand Down
4 changes: 2 additions & 2 deletions dbt_semantic_interfaces/validations/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@


class CumulativeMetricRule(SemanticManifestValidationRule[SemanticManifestT], Generic[SemanticManifestT]):
"""Checks that cumulative sum metrics are configured properly."""
"""Checks that cumulative metrics are configured properly."""

@staticmethod
@validate_safely(whats_being_done="running model validation ensuring cumulative sum metrics are valid")
@validate_safely(whats_being_done="running model validation ensuring cumulative metrics are valid")
def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[ValidationIssue]: # noqa: D
issues: List[ValidationIssue] = []

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
SemanticModelDefaultsRule,
SemanticModelValidityWindowRule,
)
from dbt_semantic_interfaces.validations.time_spines import TimeSpineRule
from dbt_semantic_interfaces.validations.unique_valid_name import (
PrimaryEntityDimensionPairs,
UniqueAndValidNameRule,
Expand Down Expand Up @@ -91,6 +92,7 @@ class SemanticManifestValidator(Generic[SemanticManifestT]):
SemanticModelLabelsRule[SemanticManifestT](),
EntityLabelsRule[SemanticManifestT](),
ConversionMetricRule[SemanticManifestT](),
TimeSpineRule[SemanticManifestT](),
)

def __init__(
Expand All @@ -100,7 +102,7 @@ def __init__(
Args:
rules: List of validation rules to run. Defaults to DEFAULT_RULES
max_workers: sets the max number of rules to run against the model concurrently
max_workers: sets the max number of rules to run against the semantic_manifest concurrently
"""
# Raises an error if 'rules' is an empty sequence or None
if not rules:
Expand Down Expand Up @@ -147,7 +149,7 @@ def _validate_multi_process( # noqa: D

def checked_validations(self, semantic_manifest: SemanticManifestT) -> None:
"""Similar to validate(), but throws an exception if validation fails."""
model_copy = copy.deepcopy(semantic_manifest)
model_issues = self.validate_semantic_manifest(model_copy)
if model_issues.has_blocking_issues:
raise SemanticManifestValidationException(issues=tuple(model_issues.all_issues))
semantic_manifest_copy = copy.deepcopy(semantic_manifest)
semantic_manifest_issues = self.validate_semantic_manifest(semantic_manifest_copy)
if semantic_manifest_issues.has_blocking_issues:
raise SemanticManifestValidationException(issues=tuple(semantic_manifest_issues.all_issues))
116 changes: 116 additions & 0 deletions dbt_semantic_interfaces/validations/time_spines.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import traceback
from typing import Dict, Generic, List, Optional, Sequence, Set

from dbt_semantic_interfaces.errors import ParsingException
from dbt_semantic_interfaces.implementations.metric import (
PydanticMetric,
PydanticMetricTimeWindow,
)
from dbt_semantic_interfaces.protocols import (
TimeSpine,
ConversionTypeParams,
Dimension,
Metric,
SemanticManifest,
SemanticManifestT,
SemanticModel,
)
from dbt_semantic_interfaces.references import (
DimensionReference,
MeasureReference,
MetricModelReference,
MetricReference,
)
from dbt_semantic_interfaces.type_enums import (
AggregationType,
MetricType,
TimeGranularity,
)
from dbt_semantic_interfaces.validations.unique_valid_name import UniqueAndValidNameRule
from dbt_semantic_interfaces.validations.validator_helpers import (
FileContext,
MetricContext,
SemanticManifestValidationRule,
ValidationError,
ValidationIssue,
ValidationWarning,
generate_exception_issue,
validate_safely,
)


class TimeSpineRule(SemanticManifestValidationRule[SemanticManifestT], Generic[SemanticManifestT]):
"""Checks that time spines are configured properly."""

@staticmethod
@validate_safely(whats_being_done="running model validation to ensure that time spines are valid")
def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[ValidationIssue]:
"""Validate time spine configs.
Note that some validation happens separately in the core parser before building this object:
- error if no time spine configured and legacy time spine model doeesn't exist
- error if granularity is missing for primary column
- error if primary column does not exist in the model
"""
issues: List[ValidationIssue] = []

if not semantic_manifest.semantic_models:
return issues

time_spines = semantic_manifest.project_configuration.time_spines
if not time_spines:
# TODO: update docs link when new one is available!
docs_message = f"See documentation to configure: https://docs.getdbt.com/docs/build/metricflow-time-spine"
# If they have the old time spine configured and need to migrate
if semantic_manifest.project_configuration.time_spine_table_configurations:
issues.append(
ValidationWarning(
message=f"Time spines without YAML configuration are in the process of deprecation. Please add YAML "
"configuration for your 'metricflow_time_spine' model. " + docs_message
)
)
return issues

# Verify that there is only one time spine per granularity
time_spines_by_granularity: Dict[TimeGranularity, List[TimeSpine]] = {}
granularities_with_multiple_time_spines: Set[TimeGranularity] = set()
for time_spine in time_spines:
granularity = time_spine.primary_column.time_granularity
if granularity in time_spines_by_granularity:
time_spines_by_granularity[granularity].append(time_spine)
else:
time_spines_by_granularity[granularity] = [time_spine]
if len(time_spines_by_granularity[granularity]) > 1:
granularities_with_multiple_time_spines.add(granularity)

if granularities_with_multiple_time_spines:
duplicate_granularity_time_spines: Dict[str, str] = {}
for granularity in granularities_with_multiple_time_spines:
duplicate_granularity_time_spines[granularity.name] = [
time_spine.node_relation.relation_name for time_spine in time_spines_by_granularity[granularity]
]
issues.append(
ValidationWarning(
message=f"Only one time spine is supported per granularity. Got duplicates: {duplicate_granularity_time_spines}"
)
)

# Warn if there is a time dimension configured with a smaller granularity than the smallest time spine granularity
dimension_granularities = {
dimension.type_params.time_granularity
for semantic_model in semantic_manifest.semantic_models
for dimension in semantic_model.dimensions
if dimension.type_params
}
smallest_dim_granularity = min(dimension_granularities)
smallest_time_spine_granularity = min(time_spines_by_granularity.keys())
if smallest_dim_granularity < smallest_time_spine_granularity:
issues.append(
ValidationWarning(
message=f"To avoid unexpected query errors, configuring a time spine at or below the smallest time "
f"dimension granularity is recommended. Smallest time dimension granularity: "
f"{smallest_dim_granularity.name}; Smallest time spine granularity: {smallest_time_spine_granularity}"
)
)

return issues
211 changes: 211 additions & 0 deletions tests/validations/test_time_spines.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
from copy import deepcopy

import pytest

from dbt_semantic_interfaces.implementations.elements.dimension import (
PydanticDimension,
PydanticDimensionTypeParams,
)
from dbt_semantic_interfaces.implementations.elements.entity import PydanticEntity
from dbt_semantic_interfaces.implementations.elements.measure import PydanticMeasure
from dbt_semantic_interfaces.implementations.filters.where_filter import (
PydanticWhereFilter,
PydanticWhereFilterIntersection,
)
from dbt_semantic_interfaces.implementations.time_spine import PydanticTimeSpine, PydanticTimeSpinePrimaryColumn
from dbt_semantic_interfaces.implementations.project_configuration import (
PydanticProjectConfiguration,
PydanticTimeSpineTableConfiguration,
)
from dbt_semantic_interfaces.implementations.node_relation import PydanticNodeRelation
from dbt_semantic_interfaces.implementations.metric import (
PydanticConstantPropertyInput,
PydanticConversionTypeParams,
PydanticCumulativeTypeParams,
PydanticMetricInput,
PydanticMetricInputMeasure,
PydanticMetricTimeWindow,
PydanticMetricTypeParams,
)
from dbt_semantic_interfaces.implementations.semantic_manifest import (
PydanticSemanticManifest,
)
from dbt_semantic_interfaces.references import (
DimensionReference,
EntityReference,
TimeDimensionReference,
)
from dbt_semantic_interfaces.test_utils import (
find_metric_with,
metric_with_guaranteed_meta,
semantic_model_with_guaranteed_meta,
)
from dbt_semantic_interfaces.type_enums import (
AggregationType,
DimensionType,
EntityType,
MetricType,
PeriodAggregation,
TimeGranularity,
)
from dbt_semantic_interfaces.validations.metrics import (
ConversionMetricRule,
CumulativeMetricRule,
DerivedMetricRule,
MetricTimeGranularityRule,
WhereFiltersAreParseable,
)
from dbt_semantic_interfaces.validations.semantic_manifest_validator import (
SemanticManifestValidator,
)
from dbt_semantic_interfaces.validations.validator_helpers import (
SemanticManifestValidationException,
)
from tests.example_project_configuration import EXAMPLE_PROJECT_CONFIGURATION


def test_valid_time_spines() -> None: # noqa: D
semantic_manifest = PydanticSemanticManifest(
semantic_models=[
semantic_model_with_guaranteed_meta(
name="sum_measure",
measures=[
PydanticMeasure(name="foo", agg=AggregationType.SUM, agg_time_dimension="dim", create_metric=True)
],
dimensions=[
PydanticDimension(
name="dim",
type=DimensionType.TIME,
type_params=PydanticDimensionTypeParams(time_granularity=TimeGranularity.SECOND),
)
],
entities=[PydanticEntity(name="entity", type=EntityType.PRIMARY)],
),
],
metrics=[],
project_configuration=PydanticProjectConfiguration(
time_spine_table_configurations=[],
time_spines=[
PydanticTimeSpine(
node_relation=PydanticNodeRelation(alias="time_spine", schema_name="my_fav_schema"),
primary_column=PydanticTimeSpinePrimaryColumn(name="ds", time_granularity=TimeGranularity.DAY),
),
PydanticTimeSpine(
node_relation=PydanticNodeRelation(alias="time_spine2", schema_name="my_fav_schema"),
primary_column=PydanticTimeSpinePrimaryColumn(name="ts", time_granularity=TimeGranularity.SECOND),
),
],
),
)
SemanticManifestValidator[PydanticSemanticManifest]().checked_validations(semantic_manifest)


def test_only_legacy_time_spine() -> None: # noqa: D
validator = SemanticManifestValidator[PydanticSemanticManifest]()
semantic_manifest = PydanticSemanticManifest(
semantic_models=[
semantic_model_with_guaranteed_meta(
name="sum_measure",
measures=[
PydanticMeasure(name="foo", agg=AggregationType.SUM, agg_time_dimension="dim", create_metric=True)
],
dimensions=[
PydanticDimension(
name="dim",
type=DimensionType.TIME,
type_params=PydanticDimensionTypeParams(time_granularity=TimeGranularity.SECOND),
)
],
entities=[PydanticEntity(name="entity", type=EntityType.PRIMARY)],
),
],
metrics=[],
project_configuration=PydanticProjectConfiguration(
time_spine_table_configurations=[
PydanticTimeSpineTableConfiguration(location="hurrr", column_name="fun_col", grain=TimeGranularity.DAY)
]
),
)
issues = validator.validate_semantic_manifest(semantic_manifest)
assert not issues.has_blocking_issues
assert len(issues.warnings) == 1
assert "Time spines without YAML configuration are in the process of deprecation." in issues.warnings[0].message


def test_duplicate_time_spine_granularity() -> None: # noqa: D
validator = SemanticManifestValidator[PydanticSemanticManifest]()
semantic_manifest = PydanticSemanticManifest(
semantic_models=[
semantic_model_with_guaranteed_meta(
name="sum_measure",
measures=[
PydanticMeasure(name="foo", agg=AggregationType.SUM, agg_time_dimension="dim", create_metric=True)
],
dimensions=[
PydanticDimension(
name="dim",
type=DimensionType.TIME,
type_params=PydanticDimensionTypeParams(time_granularity=TimeGranularity.SECOND),
)
],
entities=[PydanticEntity(name="entity", type=EntityType.PRIMARY)],
),
],
metrics=[],
project_configuration=PydanticProjectConfiguration(
time_spine_table_configurations=[],
time_spines=[
PydanticTimeSpine(
node_relation=PydanticNodeRelation(alias="time_spine", schema_name="my_fav_schema"),
primary_column=PydanticTimeSpinePrimaryColumn(name="ds", time_granularity=TimeGranularity.SECOND),
),
PydanticTimeSpine(
node_relation=PydanticNodeRelation(alias="time_spine2", schema_name="my_fav_schema"),
primary_column=PydanticTimeSpinePrimaryColumn(name="ts", time_granularity=TimeGranularity.SECOND),
),
],
),
)
issues = validator.validate_semantic_manifest(semantic_manifest)
assert not issues.has_blocking_issues
assert len(issues.warnings) == 1
assert "Only one time spine is supported per granularity." in issues.warnings[0].message


def test_dimension_granularity_smaller_than_time_spine() -> None: # noqa: D
validator = SemanticManifestValidator[PydanticSemanticManifest]()
semantic_manifest = PydanticSemanticManifest(
semantic_models=[
semantic_model_with_guaranteed_meta(
name="sum_measure",
measures=[
PydanticMeasure(name="foo", agg=AggregationType.SUM, agg_time_dimension="dim", create_metric=True)
],
dimensions=[
PydanticDimension(
name="dim",
type=DimensionType.TIME,
type_params=PydanticDimensionTypeParams(time_granularity=TimeGranularity.SECOND),
)
],
entities=[PydanticEntity(name="entity", type=EntityType.PRIMARY)],
),
],
metrics=[],
project_configuration=PydanticProjectConfiguration(
time_spine_table_configurations=[],
time_spines=[
PydanticTimeSpine(
node_relation=PydanticNodeRelation(alias="time_spine", schema_name="my_fav_schema"),
primary_column=PydanticTimeSpinePrimaryColumn(name="ds", time_granularity=TimeGranularity.DAY),
),
],
),
)
issues = validator.validate_semantic_manifest(semantic_manifest)
assert not issues.has_blocking_issues
assert len(issues.warnings) == 1
assert (
"configuring a time spine at or below the smallest time dimension granularity is recommended"
in issues.warnings[0].message
)

0 comments on commit c82679c

Please sign in to comment.