Skip to content

Commit

Permalink
fixup! Add validation rule for SCD query with no time
Browse files Browse the repository at this point in the history
  • Loading branch information
serramatutu committed Oct 10, 2024
1 parent 3d5170d commit 53eae93
Show file tree
Hide file tree
Showing 7 changed files with 77 additions and 91 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,8 @@ def __init__(

logger.debug(LazyFormat(lambda: f"Building valid group-by-item indexes took: {time.time() - start_time:.2f}s"))

def _semantic_model_is_scd(self, semantic_model: SemanticModel) -> bool:
"""Whether the semantic model is an SCD."""
def _is_semantic_model_scd(self, semantic_model: SemanticModel) -> bool:
"""Whether the semantic model's underlying table is an SCD."""
return any(dim.validity_params is not None for dim in semantic_model.dimensions)

def _generate_linkable_time_dimensions(
Expand Down Expand Up @@ -293,7 +293,7 @@ def get_joinable_metrics_for_semantic_model(
necessary.
"""
properties = frozenset({LinkableElementProperty.METRIC, LinkableElementProperty.JOINED})
if self._semantic_model_is_scd(semantic_model):
if self._is_semantic_model_scd(semantic_model):
properties = properties.union({LinkableElementProperty.SCD_HOP})

join_path_has_path_links = len(using_join_path.path_elements) > 0
Expand Down Expand Up @@ -332,7 +332,7 @@ def _get_elements_in_semantic_model(self, semantic_model: SemanticModel) -> Link
Elements related to metric_time are handled separately in _get_metric_time_elements().
Linkable metrics are not considered local to the semantic model since they always require a join.
"""
semantic_model_is_scd = self._semantic_model_is_scd(semantic_model)
semantic_model_is_scd = self._is_semantic_model_scd(semantic_model)

linkable_dimensions = []
linkable_entities = []
Expand Down Expand Up @@ -475,7 +475,7 @@ def _get_metric_time_elements(self, measure_reference: Optional[MeasureReference
defined_granularity: Optional[ExpandedTimeGranularity] = None
if measure_reference:
measure_semantic_model = self._get_semantic_model_for_measure(measure_reference)
semantic_model_is_scd = self._semantic_model_is_scd(measure_semantic_model)
semantic_model_is_scd = self._is_semantic_model_scd(measure_semantic_model)
measure_agg_time_dimension_reference = measure_semantic_model.checked_agg_time_dimension_for_measure(
measure_reference=measure_reference
)
Expand Down Expand Up @@ -732,7 +732,9 @@ def create_linkable_element_set_from_join_path(
) -> LinkableElementSet:
"""Given the current path, generate the respective linkable elements from the last semantic model in the path."""
semantic_model = self._semantic_model_lookup.get_by_reference(join_path.last_semantic_model_reference)
assert semantic_model
assert (
semantic_model
), f"Semantic model {join_path.last_semantic_model_reference.semantic_model_name} is in join path but does not exist in SemanticModelLookup"

properties = frozenset({LinkableElementProperty.JOINED})
if len(join_path.path_elements) > 1:
Expand All @@ -741,9 +743,11 @@ def create_linkable_element_set_from_join_path(
# If any of the semantic models in the join path is an SCD, add SCD_HOP
for reference_to_derived_model in join_path.derived_from_semantic_models:
derived_model = self._semantic_model_lookup.get_by_reference(reference_to_derived_model)
assert derived_model
assert (
derived_model
), f"Semantic model {reference_to_derived_model.semantic_model_name} is in join path but does not exist in SemanticModelLookup"

if self._semantic_model_is_scd(derived_model):
if self._is_semantic_model_scd(derived_model):
properties = properties.union({LinkableElementProperty.SCD_HOP})
break

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@


@dataclass(frozen=True)
class SCDRequiresMetricTimeIssue(MetricFlowQueryResolutionIssue):
class ScdRequiresMetricTimeIssue(MetricFlowQueryResolutionIssue):
"""Describes an issue with a query that includes a SCD group by but does not include metric_time."""

scd_qualified_names: Sequence[str]
Expand All @@ -31,8 +31,8 @@ def ui_description(self, associated_input: MetricFlowQueryResolverInput) -> str:
)

@override
def with_path_prefix(self, path_prefix: MetricFlowQueryResolutionPath) -> SCDRequiresMetricTimeIssue:
return SCDRequiresMetricTimeIssue(
def with_path_prefix(self, path_prefix: MetricFlowQueryResolutionPath) -> ScdRequiresMetricTimeIssue:
return ScdRequiresMetricTimeIssue(
issue_type=self.issue_type,
parent_issues=self.parent_issues,
query_resolution_path=self.query_resolution_path.with_path_prefix(path_prefix),
Expand All @@ -42,8 +42,8 @@ def with_path_prefix(self, path_prefix: MetricFlowQueryResolutionPath) -> SCDReq
@staticmethod
def from_parameters( # noqa: D102
scd_qualified_names: Sequence[str], query_resolution_path: MetricFlowQueryResolutionPath
) -> SCDRequiresMetricTimeIssue:
return SCDRequiresMetricTimeIssue(
) -> ScdRequiresMetricTimeIssue:
return ScdRequiresMetricTimeIssue(
issue_type=MetricFlowQueryIssueType.ERROR,
parent_issues=(),
query_resolution_path=query_resolution_path,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

from abc import ABC, abstractmethod
from collections.abc import Sequence
from typing import Sequence

from dbt_semantic_interfaces.protocols import WhereFilterIntersection
from dbt_semantic_interfaces.references import MetricReference
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
from __future__ import annotations

from collections.abc import Sequence
from dataclasses import dataclass
from typing import List
from typing import List, Sequence

from dbt_semantic_interfaces.enum_extension import assert_values_exhausted
from dbt_semantic_interfaces.naming.keywords import METRIC_TIME_ELEMENT_NAME
Expand All @@ -24,7 +23,7 @@
OffsetMetricRequiresMetricTimeIssue,
)
from metricflow_semantics.query.issues.parsing.scd_requires_metric_time import (
SCDRequiresMetricTimeIssue,
ScdRequiresMetricTimeIssue,
)
from metricflow_semantics.query.resolver_inputs.query_resolver_inputs import (
ResolverInputForQuery,
Expand All @@ -48,7 +47,7 @@ class MetricTimeQueryValidationRule(PostResolutionQueryValidationRule):
Currently, known cases are:
* Cumulative metrics.
* Derived metrics with an offset time.g
* Derived metrics with an offset time.
* Slowly changing dimensions
"""

Expand Down Expand Up @@ -83,8 +82,8 @@ def _get_query_items_analysis(
if group_by_item_input.spec_pattern.matches_any(valid_agg_time_dimension_specs):
has_agg_time_dimension = True

matches = group_by_item_input.spec_pattern.match(scd_specs)
scds.extend(match.qualified_name for match in matches)
scd_matches = group_by_item_input.spec_pattern.match(scd_specs)
scds.extend(match.qualified_name for match in scd_matches)

return QueryItemsAnalysis(
scds=scds,
Expand All @@ -109,7 +108,7 @@ def validate_metric_in_resolution_dag(
# only check for metric_time. If we decide to support agg_time_dimension, we should add a check
if len(query_items_analysis.scds) > 0 and not query_items_analysis.has_metric_time:
issues = issues.add_issue(
SCDRequiresMetricTimeIssue.from_parameters(
ScdRequiresMetricTimeIssue.from_parameters(
scd_qualified_names=query_items_analysis.scds,
query_resolution_path=resolution_path,
)
Expand Down Expand Up @@ -161,6 +160,3 @@ def validate_query_in_resolution_dag(
resolution_path: MetricFlowQueryResolutionPath,
) -> MetricFlowQueryResolutionIssueSet:
return MetricFlowQueryResolutionIssueSet.empty_instance()


__all__ = ["MetricTimeQueryValidationRule"]
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def test_create_linkable_element_set_from_join_path_multi_hop( # noqa: D103
left_semantic_model_reference=SemanticModelReference("views_source"),
path_elements=(
SemanticModelJoinPathElement(
semantic_model_reference=SemanticModelReference("bookings"),
semantic_model_reference=SemanticModelReference("bookings_source"),
join_on_entity=EntityReference("guest"),
),
SemanticModelJoinPathElement(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from __future__ import annotations

import logging
import os
import textwrap
from typing import List

import pytest
from _pytest.fixtures import FixtureRequest
from dbt_semantic_interfaces.implementations.semantic_manifest import PydanticSemanticManifest
from dbt_semantic_interfaces.parsing.dir_to_model import parse_yaml_files_to_validation_ready_semantic_manifest
from dbt_semantic_interfaces.parsing.objects import YamlConfigFile
from dbt_semantic_interfaces.protocols import SemanticManifest
Expand All @@ -30,9 +30,6 @@
EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE,
)
from metricflow_semantics.test_helpers.metric_time_dimension import MTD
from metricflow_semantics.test_helpers.semantic_manifest_yamls.scd_manifest import (
SCD_MANIFEST_ANCHOR,
)
from metricflow_semantics.test_helpers.snapshot_helpers import assert_object_snapshot_equal

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -196,20 +193,9 @@ def revenue_query_parser() -> MetricFlowQueryParser: # noqa


@pytest.fixture
def scd_query_parser() -> MetricFlowQueryParser: # noqa
file_paths = [
os.path.join(SCD_MANIFEST_ANCHOR.directory, f)
for f in os.listdir(SCD_MANIFEST_ANCHOR.directory)
if f.endswith(".yaml")
]

contents: List[str] = []
for fp in file_paths:
with open(fp, "r") as f:
contents.append(f.read())

files = [YamlConfigFile(filepath="inline_for_test_1", contents=c) for c in contents]
return query_parser_from_yaml(files)
def scd_query_parser(scd_semantic_manifest: PydanticSemanticManifest) -> MetricFlowQueryParser: # noqa
semantic_manifest_lookup = SemanticManifestLookup(scd_semantic_manifest)
return MetricFlowQueryParser(semantic_manifest_lookup)


def test_query_parser( # noqa: D103
Expand Down
Loading

0 comments on commit 53eae93

Please sign in to comment.