Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve robustness of LinkableElementSet operations #1145

Merged
merged 3 commits into from
Apr 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 55 additions & 2 deletions metricflow/model/semantics/linkable_element.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,42 @@
from enum import Enum
from typing import FrozenSet, Optional, Tuple

from dbt_semantic_interfaces.enum_extension import assert_values_exhausted
from dbt_semantic_interfaces.protocols.dimension import DimensionType
from dbt_semantic_interfaces.references import DimensionReference, MetricReference, SemanticModelReference
from dbt_semantic_interfaces.type_enums.date_part import DatePart
from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity

from metricflow.specs.specs import EntityReference


class LinkableElementType(Enum):
"""Enumeration of the possible types of linkable element we are encountering or expecting.

LinkableElements effectively map on to LinkableSpecs and queryable semantic manifest elements such
as Metrics, Dimensions, and Entities. This provides the full set of types we might encounter, and is
useful for ensuring that we are always getting the correct LinkableElement from a given part of the
codebase - e.g., to ensure we are not accidentally getting an Entity when we expect a Dimension.
"""

DIMENSION = "dimension"
ENTITY = "entity"
METRIC = "metric"
TIME_DIMENSION = "time_dimension"

@property
def is_dimension_type(self) -> bool:
"""Property to simplify scenarios where callers need to know whether or not this represents a dimension."""
# Use a local alias to allow type refinement for the static exhaustive switch assertion
element_type = self
if element_type is LinkableElementType.DIMENSION or element_type is LinkableElementType.TIME_DIMENSION:
return True
elif element_type is LinkableElementType.ENTITY or element_type is LinkableElementType.METRIC:
return False
else:
return assert_values_exhausted(element_type)


class LinkableElementProperty(Enum):
"""The properties associated with a valid linkable element.

Expand Down Expand Up @@ -55,10 +84,23 @@ class ElementPathKey:
"""A key that can uniquely identify an element and the joins used to realize the element."""

element_name: str
element_type: LinkableElementType
entity_links: Tuple[EntityReference, ...]
time_granularity: Optional[TimeGranularity] = None
date_part: Optional[DatePart] = None

def __post_init__(self) -> None:
"""Asserts all requirements associated with the element_type are met."""
element_type = self.element_type
if element_type is LinkableElementType.TIME_DIMENSION:
assert (
self.time_granularity
), "Time granularity must be specified for all ElementPathKeys associated with time dimensions!"
elif element_type is LinkableElementType.DIMENSION or LinkableElementType.ENTITY or LinkableElementType.METRIC:
pass
else:
assert_values_exhausted(element_type)


@dataclass(frozen=True)
class SemanticModelJoinPathElement:
Expand All @@ -75,6 +117,7 @@ class LinkableDimension:
# The semantic model where this dimension was defined.
semantic_model_origin: Optional[SemanticModelReference]
element_name: str
dimension_type: DimensionType
entity_links: Tuple[EntityReference, ...]
join_path: Tuple[SemanticModelJoinPathElement, ...]
properties: FrozenSet[LinkableElementProperty]
Expand All @@ -83,8 +126,14 @@ class LinkableDimension:

@property
def path_key(self) -> ElementPathKey: # noqa: D102
if self.dimension_type is DimensionType.CATEGORICAL:
element_type = LinkableElementType.DIMENSION
else:
element_type = LinkableElementType.TIME_DIMENSION

return ElementPathKey(
element_name=self.element_name,
element_type=element_type,
entity_links=self.entity_links,
time_granularity=self.time_granularity,
date_part=self.date_part,
Expand All @@ -108,7 +157,9 @@ class LinkableEntity:

@property
def path_key(self) -> ElementPathKey: # noqa: D102
return ElementPathKey(element_name=self.element_name, entity_links=self.entity_links)
return ElementPathKey(
element_name=self.element_name, element_type=LinkableElementType.ENTITY, entity_links=self.entity_links
)

@property
def reference(self) -> EntityReference: # noqa: D102
Expand All @@ -128,7 +179,9 @@ class LinkableMetric:

@property
def path_key(self) -> ElementPathKey: # noqa: D102
return ElementPathKey(element_name=self.element_name, entity_links=self.entity_links)
return ElementPathKey(
element_name=self.element_name, element_type=LinkableElementType.METRIC, entity_links=self.entity_links
)

@property
def reference(self) -> MetricReference: # noqa: D102
Expand Down
27 changes: 12 additions & 15 deletions metricflow/model/semantics/linkable_element_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
ElementPathKey,
LinkableDimension,
LinkableElementProperty,
LinkableElementType,
LinkableEntity,
LinkableMetric,
)
Expand All @@ -21,17 +22,6 @@ class LinkableElementSet:
TODO: There are similarities with LinkableSpecSet - consider consolidation.
"""

# Dictionaries that map the path key to context on the dimension
#
# For example:
# {
# "listing__country_latest": (
# LinkableDimension(
# element_name="country_latest",
# entity_links=("listing",),
# semantic_model_origin="listings_latest_source",
# )
# }
path_key_to_linkable_dimensions: Dict[ElementPathKey, Tuple[LinkableDimension, ...]] = field(default_factory=dict)
path_key_to_linkable_entities: Dict[ElementPathKey, Tuple[LinkableEntity, ...]] = field(default_factory=dict)
path_key_to_linkable_metrics: Dict[ElementPathKey, Tuple[LinkableMetric, ...]] = field(default_factory=dict)
Expand All @@ -40,7 +30,10 @@ class LinkableElementSet:
def merge_by_path_key(linkable_element_sets: Sequence[LinkableElementSet]) -> LinkableElementSet:
"""Combine multiple sets together by the path key.

If there are elements with the same join key, those elements will be categorized as ambiguous.
If there are elements with the same join key and different element(s) in the tuple of values,
those elements will be categorized as ambiguous.
Note this does not deduplicate values, so there may be unambiguous merged sets that appear to have
multiple values if all one does is a simple length check.
"""
key_to_linkable_dimensions: Dict[ElementPathKey, List[LinkableDimension]] = defaultdict(list)
key_to_linkable_entities: Dict[ElementPathKey, List[LinkableEntity]] = defaultdict(list)
Expand All @@ -54,7 +47,6 @@ def merge_by_path_key(linkable_element_sets: Sequence[LinkableElementSet]) -> Li
for path_key, linkable_metrics in linkable_element_set.path_key_to_linkable_metrics.items():
key_to_linkable_metrics[path_key].extend(linkable_metrics)

# Convert the dictionaries to use tuples instead of lists.
return LinkableElementSet(
path_key_to_linkable_dimensions={
path_key: tuple(dimensions) for path_key, dimensions in key_to_linkable_dimensions.items()
Expand All @@ -71,6 +63,11 @@ def merge_by_path_key(linkable_element_sets: Sequence[LinkableElementSet]) -> Li
def intersection_by_path_key(linkable_element_sets: Sequence[LinkableElementSet]) -> LinkableElementSet:
"""Find the intersection of all elements in the sets by path key.

This will return the intersection of all path keys defined in the sets, but the union of elements associated
with each path key. In other words, it filters out path keys (i.e., linkable specs) that are not referenced
in every set in the input sequence, but it preserves all of the various potentially ambiguous LinkableElement
instances associated with the path keys that remain.

This is useful to figure out the common dimensions that are possible to query with multiple metrics. You would
find the LinkableSpecSet for each metric in the query, then do an intersection of the sets.
"""
Expand Down Expand Up @@ -210,7 +207,7 @@ def as_spec_set(self) -> LinkableSpecSet: # noqa: D102
entity_links=path_key.entity_links,
)
for path_key in self.path_key_to_linkable_dimensions.keys()
if not path_key.time_granularity
if path_key.element_type is LinkableElementType.DIMENSION
),
time_dimension_specs=tuple(
TimeDimensionSpec(
Expand All @@ -220,7 +217,7 @@ def as_spec_set(self) -> LinkableSpecSet: # noqa: D102
date_part=path_key.date_part,
)
for path_key in self.path_key_to_linkable_dimensions.keys()
if path_key.time_granularity
if path_key.element_type is LinkableElementType.TIME_DIMENSION and path_key.time_granularity
),
entity_specs=tuple(
EntitySpec(
Expand Down
13 changes: 12 additions & 1 deletion metricflow/model/semantics/linkable_spec_resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
ElementPathKey,
LinkableDimension,
LinkableElementProperty,
LinkableElementType,
LinkableEntity,
LinkableMetric,
SemanticModelJoinPath,
Expand Down Expand Up @@ -69,6 +70,7 @@ def _generate_linkable_time_dimensions(
LinkableDimension(
semantic_model_origin=semantic_model_origin,
element_name=dimension.reference.element_name,
dimension_type=DimensionType.TIME,
entity_links=entity_links,
join_path=tuple(join_path),
time_granularity=time_granularity,
Expand All @@ -84,6 +86,7 @@ def _generate_linkable_time_dimensions(
LinkableDimension(
semantic_model_origin=semantic_model_origin,
element_name=dimension.reference.element_name,
dimension_type=DimensionType.TIME,
entity_links=entity_links,
join_path=tuple(join_path),
time_granularity=time_granularity,
Expand Down Expand Up @@ -193,7 +196,11 @@ def __init__(
for entity in semantic_model.entities:
linkable_metrics_set = LinkableElementSet(
path_key_to_linkable_metrics={
ElementPathKey(element_name=metric.element_name, entity_links=(entity.reference,)): (
ElementPathKey(
element_name=metric.element_name,
element_type=LinkableElementType.METRIC,
entity_links=(entity.reference,),
): (
LinkableMetric(
element_name=metric.element_name,
entity_links=(entity.reference,),
Expand Down Expand Up @@ -293,6 +300,7 @@ def _get_elements_in_semantic_model(self, semantic_model: SemanticModel) -> Link
LinkableDimension(
semantic_model_origin=semantic_model.reference,
element_name=dimension.reference.element_name,
dimension_type=DimensionType.CATEGORICAL,
entity_links=(entity_link,),
join_path=(),
properties=dimension_properties,
Expand Down Expand Up @@ -405,6 +413,7 @@ def _get_metric_time_elements(self, measure_reference: Optional[MeasureReference
for date_part in possible_date_parts:
path_key = ElementPathKey(
element_name=DataSet.metric_time_dimension_name(),
element_type=LinkableElementType.TIME_DIMENSION,
entity_links=(),
time_granularity=time_granularity,
date_part=date_part,
Expand All @@ -413,6 +422,7 @@ def _get_metric_time_elements(self, measure_reference: Optional[MeasureReference
LinkableDimension(
semantic_model_origin=measure_semantic_model.reference if measure_semantic_model else None,
element_name=DataSet.metric_time_dimension_name(),
dimension_type=DimensionType.TIME,
entity_links=(),
join_path=(),
# Anything that's not at the base time granularity of the measure's aggregation time dimension
Expand Down Expand Up @@ -642,6 +652,7 @@ def create_linkable_element_set_from_join_path(
LinkableDimension(
semantic_model_origin=semantic_model.reference,
element_name=dimension.reference.element_name,
dimension_type=DimensionType.CATEGORICAL,
entity_links=entity_links,
join_path=join_path.path_elements,
properties=with_properties,
Expand Down
Loading
Loading