Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a Resolver for Saved-Query Dependencies #1152

Merged
merged 14 commits into from
Apr 29, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from __future__ import annotations

from abc import ABC, abstractmethod
from typing import Sequence

from dbt_semantic_interfaces.references import SemanticModelReference


class SemanticModelDerivation(ABC):
"""Interface for an object that can be described as derived from a semantic model."""

@property
@abstractmethod
def derived_from_semantic_models(self) -> Sequence[SemanticModelReference]:
"""The semantic models that this was derived from.
The returned sequence should be ordered and not contain duplicates.
"""
raise NotImplementedError
Original file line number Diff line number Diff line change
@@ -1,17 +1,27 @@
from __future__ import annotations

import logging
from abc import ABC
from dataclasses import dataclass
from enum import Enum
from typing import FrozenSet, Optional, Tuple
from typing import FrozenSet, Optional, Sequence, Tuple

from dbt_semantic_interfaces.enum_extension import assert_values_exhausted
from dbt_semantic_interfaces.protocols.dimension import DimensionType
from dbt_semantic_interfaces.references import DimensionReference, MetricReference, SemanticModelReference
from dbt_semantic_interfaces.references import (
DimensionReference,
EntityReference,
MetricReference,
SemanticModelReference,
)
from dbt_semantic_interfaces.type_enums.date_part import DatePart
from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity
from typing_extensions import override

from metricflow_semantics.model.linkable_element_property import LinkableElementProperty
from metricflow_semantics.specs.spec_classes import EntityReference
from metricflow_semantics.model.semantic_model_derivation import SemanticModelDerivation

logger = logging.getLogger(__name__)


class LinkableElementType(Enum):
Expand Down Expand Up @@ -72,8 +82,14 @@ class SemanticModelJoinPathElement:
join_on_entity: EntityReference


class LinkableElement(SemanticModelDerivation, ABC):
"""An entity / dimension that may have been joined by entities."""

pass


@dataclass(frozen=True)
class LinkableDimension:
class LinkableDimension(LinkableElement):
"""Describes how a dimension can be realized by joining based on entity links."""

# The semantic model where this dimension was defined.
Expand Down Expand Up @@ -105,9 +121,20 @@ def path_key(self) -> ElementPathKey: # noqa: D102
def reference(self) -> DimensionReference: # noqa: D102
return DimensionReference(element_name=self.element_name)

@property
@override
def derived_from_semantic_models(self) -> Sequence[SemanticModelReference]:
semantic_model_references = set()
if self.semantic_model_origin:
semantic_model_references.add(self.semantic_model_origin)
for join_path_item in self.join_path:
semantic_model_references.add(join_path_item.semantic_model_reference)

return sorted(semantic_model_references, key=lambda reference: reference.semantic_model_name)


@dataclass(frozen=True)
class LinkableEntity:
class LinkableEntity(LinkableElement, SemanticModelDerivation):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You don't need both of these, right, just LinkableElement?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right - updated.

"""Describes how an entity can be realized by joining based on entity links."""

# The semantic model where this entity was defined.
Expand All @@ -127,9 +154,18 @@ def path_key(self) -> ElementPathKey: # noqa: D102
def reference(self) -> EntityReference: # noqa: D102
return EntityReference(element_name=self.element_name)

@property
@override
def derived_from_semantic_models(self) -> Sequence[SemanticModelReference]:
semantic_model_references = {self.semantic_model_origin}
for join_path_item in self.join_path:
semantic_model_references.add(join_path_item.semantic_model_reference)

return sorted(semantic_model_references, key=lambda reference: reference.semantic_model_name)


@dataclass(frozen=True)
class LinkableMetric:
class LinkableMetric(LinkableElement, SemanticModelDerivation):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same thing here, just LinkableElement?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated.

"""Describes how a metric can be realized by joining based on entity links."""

element_name: str
Expand All @@ -149,6 +185,15 @@ def path_key(self) -> ElementPathKey: # noqa: D102
def reference(self) -> MetricReference: # noqa: D102
return MetricReference(element_name=self.element_name)

@property
@override
def derived_from_semantic_models(self) -> Sequence[SemanticModelReference]:
semantic_model_references = {self.join_by_semantic_model}
for join_path_item in self.join_path:
semantic_model_references.add(join_path_item.semantic_model_reference)

return sorted(semantic_model_references, key=lambda reference: reference.semantic_model_name)


@dataclass(frozen=True)
class SemanticModelJoinPath:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,32 @@
from dataclasses import dataclass, field
from typing import Dict, FrozenSet, List, Sequence, Set, Tuple

from dbt_semantic_interfaces.enum_extension import assert_values_exhausted
from dbt_semantic_interfaces.references import SemanticModelReference
from typing_extensions import override

from metricflow_semantics.model.linkable_element_property import LinkableElementProperty
from metricflow_semantics.model.semantic_model_derivation import SemanticModelDerivation
from metricflow_semantics.model.semantics.linkable_element import (
ElementPathKey,
LinkableDimension,
LinkableElementType,
LinkableEntity,
LinkableMetric,
)
from metricflow_semantics.specs.group_by_metric_spec import GroupByMetricSpec
from metricflow_semantics.specs.patterns.spec_pattern import SpecPattern
from metricflow_semantics.specs.spec_classes import (
DimensionSpec,
EntitySpec,
LinkableSpecSet,
GroupByMetricSpec,
InstanceSpec,
LinkableInstanceSpec,
TimeDimensionSpec,
)


@dataclass(frozen=True)
class LinkableElementSet:
class LinkableElementSet(SemanticModelDerivation):
"""Container class for storing all linkable elements for a metric.

TODO: There are similarities with LinkableSpecSet - consider consolidation.
Expand Down Expand Up @@ -61,6 +68,33 @@ def __post_init__(self) -> None:
f"type! Mismatched elements: {mismatched_elements}"
)

# There shouldn't be a path key without any concrete items. Can be an issue as specs contained in this set are
# generated from the path keys.
for key, value in (
tuple(self.path_key_to_linkable_dimensions.items())
+ tuple(self.path_key_to_linkable_entities.items())
+ tuple(self.path_key_to_linkable_metrics.items())
):
assert len(value) > 0, f"{key} is empty"

# There shouldn't be any duplicate specs.
specs = self.specs
deduped_specs = set(specs)
assert len(deduped_specs) == len(specs)
assert len(deduped_specs) == (
len(self.path_key_to_linkable_dimensions)
+ len(self.path_key_to_linkable_entities)
+ len(self.path_key_to_linkable_metrics)
)

# Check time dimensions have the grain set.
for path_key, linkable_dimensions in self.path_key_to_linkable_dimensions.items():
if path_key.element_type is LinkableElementType.TIME_DIMENSION:
for linkable_dimension in linkable_dimensions:
assert (
linkable_dimension.time_granularity is not None
), f"{path_key} has a dimension without the time granularity set: {linkable_dimension}"

@staticmethod
def merge_by_path_key(linkable_element_sets: Sequence[LinkableElementSet]) -> LinkableElementSet:
"""Combine multiple sets together by the path key.
Expand Down Expand Up @@ -108,6 +142,8 @@ def intersection_by_path_key(linkable_element_sets: Sequence[LinkableElementSet]
"""
if len(linkable_element_sets) == 0:
return LinkableElementSet()
elif len(linkable_element_sets) == 1:
return linkable_element_sets[0]

# Find path keys that are common to all LinkableElementSets.
dimension_path_keys: List[Set[ElementPathKey]] = []
Expand All @@ -120,7 +156,6 @@ def intersection_by_path_key(linkable_element_sets: Sequence[LinkableElementSet]
common_linkable_dimension_path_keys = set.intersection(*dimension_path_keys) if dimension_path_keys else set()
common_linkable_entity_path_keys = set.intersection(*entity_path_keys) if entity_path_keys else set()
common_linkable_metric_path_keys = set.intersection(*metric_path_keys) if metric_path_keys else set()

# Create a new LinkableElementSet that only includes items where the path key is common to all sets.
join_path_to_linkable_dimensions: Dict[ElementPathKey, Set[LinkableDimension]] = defaultdict(set)
join_path_to_linkable_entities: Dict[ElementPathKey, Set[LinkableEntity]] = defaultdict(set)
Expand Down Expand Up @@ -233,43 +268,6 @@ def filter(
path_key_to_linkable_metrics=key_to_linkable_metrics,
)

@property
def as_spec_set(self) -> LinkableSpecSet: # noqa: D102
return LinkableSpecSet(
dimension_specs=tuple(
DimensionSpec(
element_name=path_key.element_name,
entity_links=path_key.entity_links,
)
for path_key in self.path_key_to_linkable_dimensions.keys()
if path_key.element_type is LinkableElementType.DIMENSION
),
time_dimension_specs=tuple(
TimeDimensionSpec(
element_name=path_key.element_name,
entity_links=path_key.entity_links,
time_granularity=path_key.time_granularity,
date_part=path_key.date_part,
)
for path_key in self.path_key_to_linkable_dimensions.keys()
if path_key.element_type is LinkableElementType.TIME_DIMENSION and path_key.time_granularity
),
entity_specs=tuple(
EntitySpec(
element_name=path_key.element_name,
entity_links=path_key.entity_links,
)
for path_key in self.path_key_to_linkable_entities
),
group_by_metric_specs=tuple(
GroupByMetricSpec(
element_name=path_key.element_name,
entity_links=path_key.entity_links,
)
for path_key in self.path_key_to_linkable_metrics
),
)

@property
def only_unique_path_keys(self) -> LinkableElementSet:
"""Returns a set that only includes path keys that map to a single distinct element."""
Expand All @@ -290,3 +288,104 @@ def only_unique_path_keys(self) -> LinkableElementSet:
if len(set(linkable_metrics)) <= 1
},
)

@property
@override
def derived_from_semantic_models(self) -> Sequence[SemanticModelReference]:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we add some test cases for this and the filter by spec pattern operations? They're quite a bit more complex than the direct to spec conversion.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add some test cases.

semantic_model_references: Set[SemanticModelReference] = set()
for linkable_dimensions in self.path_key_to_linkable_dimensions.values():
for linkable_dimension in linkable_dimensions:
semantic_model_references.update(linkable_dimension.derived_from_semantic_models)
for linkable_entities in self.path_key_to_linkable_entities.values():
for linkable_entity in linkable_entities:
semantic_model_references.update(linkable_entity.derived_from_semantic_models)
for linkable_metrics in self.path_key_to_linkable_metrics.values():
for linkable_metric in linkable_metrics:
semantic_model_references.update(linkable_metric.derived_from_semantic_models)

return sorted(semantic_model_references, key=lambda reference: reference.semantic_model_name)

@property
def spec_count(self) -> int:
"""If this is mapped to spec objects, the number of specs that would be produced."""
return (
len(self.path_key_to_linkable_dimensions.keys())
+ len(self.path_key_to_linkable_entities.keys())
+ len(self.path_key_to_linkable_metrics.keys())
)

@property
def specs(self) -> Sequence[LinkableInstanceSpec]:
"""Converts the items in a `LinkableElementSet` to their corresponding spec objects."""
specs: List[LinkableInstanceSpec] = []

for path_key in (
tuple(self.path_key_to_linkable_dimensions.keys())
+ tuple(self.path_key_to_linkable_entities.keys())
+ tuple(self.path_key_to_linkable_metrics.keys())
):
specs.append(self._path_key_to_spec(path_key))

return specs

def _path_key_to_spec(self, path_key: ElementPathKey) -> LinkableInstanceSpec:
if path_key.element_type is LinkableElementType.DIMENSION:
return DimensionSpec(
element_name=path_key.element_name,
entity_links=path_key.entity_links,
)
elif path_key.element_type is LinkableElementType.TIME_DIMENSION:
assert path_key.time_granularity is not None
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: error message, maybe something like "type refinement, should have been checked in dataclass validation"

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated.

return TimeDimensionSpec(
element_name=path_key.element_name,
entity_links=path_key.entity_links,
time_granularity=path_key.time_granularity,
date_part=path_key.date_part,
)
elif path_key.element_type is LinkableElementType.ENTITY:
return EntitySpec(
element_name=path_key.element_name,
entity_links=path_key.entity_links,
)
elif path_key.element_type is LinkableElementType.METRIC:
return GroupByMetricSpec(
element_name=path_key.element_name,
entity_links=path_key.entity_links,
)
else:
assert_values_exhausted(path_key.element_type)

def filter_by_spec_patterns(self, spec_patterns: Sequence[SpecPattern]) -> LinkableElementSet:
"""Filter the elements in the set by the given spec patters.

Returns a new set consisting of the elements in the `LinkableElementSet` that have a corresponding spec that
match all the given spec patterns.
"""
# Spec patterns need all specs to match properly e.g. `BaseTimeGrainPattern`.
matching_specs: Sequence[InstanceSpec] = self.specs

for spec_pattern in spec_patterns:
matching_specs = spec_pattern.match(matching_specs)
specs_to_include = set(matching_specs)

path_key_to_linkable_dimensions: Dict[ElementPathKey, Tuple[LinkableDimension, ...]] = {}
path_key_to_linkable_entities: Dict[ElementPathKey, Tuple[LinkableEntity, ...]] = {}
path_key_to_linkable_metrics: Dict[ElementPathKey, Tuple[LinkableMetric, ...]] = {}

for path_key, linkable_dimensions in self.path_key_to_linkable_dimensions.items():
if self._path_key_to_spec(path_key) in specs_to_include:
path_key_to_linkable_dimensions[path_key] = linkable_dimensions

for path_key, linkable_entities in self.path_key_to_linkable_entities.items():
if self._path_key_to_spec(path_key) in specs_to_include:
path_key_to_linkable_entities[path_key] = linkable_entities

for path_key, linkable_metrics in self.path_key_to_linkable_metrics.items():
if self._path_key_to_spec(path_key) in specs_to_include:
path_key_to_linkable_metrics[path_key] = linkable_metrics

return LinkableElementSet(
path_key_to_linkable_dimensions=path_key_to_linkable_dimensions,
path_key_to_linkable_entities=path_key_to_linkable_entities,
path_key_to_linkable_metrics=path_key_to_linkable_metrics,
)