From b1f728be5a775cea2c2bcc0108d8df8ddec0bc10 Mon Sep 17 00:00:00 2001 From: tlento Date: Thu, 18 Apr 2024 22:48:34 -0700 Subject: [PATCH] Add LinkableElementType annotation to ElementPathKey Currently the ElementPathKey is agnostic to the type of element it represents. In some cases this will be fairly obvious - if the time granularity is set it is a time dimension. In others we can determine this by inspection - if the name and links resolve to a distinct dimension in a semantic model then it must be a dimension, and similarly for an entity. However, there is the possibility of some ambiguity in rare cases where an entity and dimension might end up with the same identifier/link combinations. This doesn't matter in the current usage of the ElementPathKey, which is always bound to an element subtype, but when we add support for predicate pushdown we'll need to be able to use this key to fetch the specific set of LinkableElement classes, and this means we must be able to easily get a determinstic type. This commit adds the elmeent type annotation we require, and does some additional related work to ensure that time dimensions and dimensions are always classified correctly when the LinkableDimension instance is created. --- .../model/semantics/linkable_element.py | 57 ++++++++++++++++++- .../model/semantics/linkable_element_set.py | 5 +- .../model/semantics/linkable_spec_resolver.py | 13 ++++- 3 files changed, 70 insertions(+), 5 deletions(-) diff --git a/metricflow/model/semantics/linkable_element.py b/metricflow/model/semantics/linkable_element.py index 650748685e..9f95b0cdd9 100644 --- a/metricflow/model/semantics/linkable_element.py +++ b/metricflow/model/semantics/linkable_element.py @@ -4,6 +4,8 @@ from enum import Enum from typing import FrozenSet, Optional, Tuple +from dbt_semantic_interfaces.enum_extension import assert_values_exhausted +from dbt_semantic_interfaces.protocols.dimension import DimensionType from dbt_semantic_interfaces.references import DimensionReference, MetricReference, SemanticModelReference from dbt_semantic_interfaces.type_enums.date_part import DatePart from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity @@ -11,6 +13,33 @@ from metricflow.specs.specs import EntityReference +class LinkableElementType(Enum): + """Enumeration of the possible types of linkable element we are encountering or expecting. + + LinkableElements effectively map on to LinkableSpecs and queryable semantic manifest elements such + as Metrics, Dimensions, and Entities. This provides the full set of types we might encounter, and is + useful for ensuring that we are always getting the correct LinkableElement from a given part of the + codebase - e.g., to ensure we are not accidentally getting an Entity when we expect a Dimension. + """ + + DIMENSION = "dimension" + ENTITY = "entity" + METRIC = "metric" + TIME_DIMENSION = "time_dimension" + + @property + def is_dimension_type(self) -> bool: + """Property to simplify scenarios where callers need to know whether or not this represents a dimension.""" + # Use a local alias to allow type refinement for the static exhaustive switch assertion + element_type = self + if element_type is LinkableElementType.DIMENSION or element_type is LinkableElementType.TIME_DIMENSION: + return True + elif element_type is LinkableElementType.ENTITY or element_type is LinkableElementType.METRIC: + return False + else: + return assert_values_exhausted(element_type) + + class LinkableElementProperty(Enum): """The properties associated with a valid linkable element. @@ -55,10 +84,23 @@ class ElementPathKey: """A key that can uniquely identify an element and the joins used to realize the element.""" element_name: str + element_type: LinkableElementType entity_links: Tuple[EntityReference, ...] time_granularity: Optional[TimeGranularity] = None date_part: Optional[DatePart] = None + def __post_init__(self) -> None: + """Asserts all requirements associated with the element_type are met.""" + element_type = self.element_type + if element_type is LinkableElementType.TIME_DIMENSION: + assert ( + self.time_granularity + ), "Time granularity must be specified for all ElementPathKeys associated with time dimensions!" + elif element_type is LinkableElementType.DIMENSION or LinkableElementType.ENTITY or LinkableElementType.METRIC: + pass + else: + assert_values_exhausted(element_type) + @dataclass(frozen=True) class SemanticModelJoinPathElement: @@ -75,6 +117,7 @@ class LinkableDimension: # The semantic model where this dimension was defined. semantic_model_origin: Optional[SemanticModelReference] element_name: str + dimension_type: DimensionType entity_links: Tuple[EntityReference, ...] join_path: Tuple[SemanticModelJoinPathElement, ...] properties: FrozenSet[LinkableElementProperty] @@ -83,8 +126,14 @@ class LinkableDimension: @property def path_key(self) -> ElementPathKey: # noqa: D102 + if self.dimension_type is DimensionType.CATEGORICAL: + element_type = LinkableElementType.DIMENSION + else: + element_type = LinkableElementType.TIME_DIMENSION + return ElementPathKey( element_name=self.element_name, + element_type=element_type, entity_links=self.entity_links, time_granularity=self.time_granularity, date_part=self.date_part, @@ -108,7 +157,9 @@ class LinkableEntity: @property def path_key(self) -> ElementPathKey: # noqa: D102 - return ElementPathKey(element_name=self.element_name, entity_links=self.entity_links) + return ElementPathKey( + element_name=self.element_name, element_type=LinkableElementType.ENTITY, entity_links=self.entity_links + ) @property def reference(self) -> EntityReference: # noqa: D102 @@ -128,7 +179,9 @@ class LinkableMetric: @property def path_key(self) -> ElementPathKey: # noqa: D102 - return ElementPathKey(element_name=self.element_name, entity_links=self.entity_links) + return ElementPathKey( + element_name=self.element_name, element_type=LinkableElementType.METRIC, entity_links=self.entity_links + ) @property def reference(self) -> MetricReference: # noqa: D102 diff --git a/metricflow/model/semantics/linkable_element_set.py b/metricflow/model/semantics/linkable_element_set.py index 444838a4d4..9d42fdb1d2 100644 --- a/metricflow/model/semantics/linkable_element_set.py +++ b/metricflow/model/semantics/linkable_element_set.py @@ -8,6 +8,7 @@ ElementPathKey, LinkableDimension, LinkableElementProperty, + LinkableElementType, LinkableEntity, LinkableMetric, ) @@ -198,7 +199,7 @@ def as_spec_set(self) -> LinkableSpecSet: # noqa: D102 entity_links=path_key.entity_links, ) for path_key in self.path_key_to_linkable_dimensions.keys() - if not path_key.time_granularity + if path_key.element_type is LinkableElementType.DIMENSION ), time_dimension_specs=tuple( TimeDimensionSpec( @@ -208,7 +209,7 @@ def as_spec_set(self) -> LinkableSpecSet: # noqa: D102 date_part=path_key.date_part, ) for path_key in self.path_key_to_linkable_dimensions.keys() - if path_key.time_granularity + if path_key.element_type is LinkableElementType.TIME_DIMENSION and path_key.time_granularity ), entity_specs=tuple( EntitySpec( diff --git a/metricflow/model/semantics/linkable_spec_resolver.py b/metricflow/model/semantics/linkable_spec_resolver.py index 767990fa8c..85429eb0d5 100644 --- a/metricflow/model/semantics/linkable_spec_resolver.py +++ b/metricflow/model/semantics/linkable_spec_resolver.py @@ -26,6 +26,7 @@ ElementPathKey, LinkableDimension, LinkableElementProperty, + LinkableElementType, LinkableEntity, LinkableMetric, SemanticModelJoinPath, @@ -69,6 +70,7 @@ def _generate_linkable_time_dimensions( LinkableDimension( semantic_model_origin=semantic_model_origin, element_name=dimension.reference.element_name, + dimension_type=DimensionType.TIME, entity_links=entity_links, join_path=tuple(join_path), time_granularity=time_granularity, @@ -84,6 +86,7 @@ def _generate_linkable_time_dimensions( LinkableDimension( semantic_model_origin=semantic_model_origin, element_name=dimension.reference.element_name, + dimension_type=DimensionType.TIME, entity_links=entity_links, join_path=tuple(join_path), time_granularity=time_granularity, @@ -193,7 +196,11 @@ def __init__( for entity in semantic_model.entities: linkable_metrics_set = LinkableElementSet( path_key_to_linkable_metrics={ - ElementPathKey(element_name=metric.element_name, entity_links=(entity.reference,)): ( + ElementPathKey( + element_name=metric.element_name, + element_type=LinkableElementType.METRIC, + entity_links=(entity.reference,), + ): ( LinkableMetric( element_name=metric.element_name, entity_links=(entity.reference,), @@ -293,6 +300,7 @@ def _get_elements_in_semantic_model(self, semantic_model: SemanticModel) -> Link LinkableDimension( semantic_model_origin=semantic_model.reference, element_name=dimension.reference.element_name, + dimension_type=DimensionType.CATEGORICAL, entity_links=(entity_link,), join_path=(), properties=dimension_properties, @@ -405,6 +413,7 @@ def _get_metric_time_elements(self, measure_reference: Optional[MeasureReference for date_part in possible_date_parts: path_key = ElementPathKey( element_name=DataSet.metric_time_dimension_name(), + element_type=LinkableElementType.TIME_DIMENSION, entity_links=(), time_granularity=time_granularity, date_part=date_part, @@ -413,6 +422,7 @@ def _get_metric_time_elements(self, measure_reference: Optional[MeasureReference LinkableDimension( semantic_model_origin=measure_semantic_model.reference if measure_semantic_model else None, element_name=DataSet.metric_time_dimension_name(), + dimension_type=DimensionType.TIME, entity_links=(), join_path=(), # Anything that's not at the base time granularity of the measure's aggregation time dimension @@ -642,6 +652,7 @@ def create_linkable_element_set_from_join_path( LinkableDimension( semantic_model_origin=semantic_model.reference, element_name=dimension.reference.element_name, + dimension_type=DimensionType.CATEGORICAL, entity_links=entity_links, join_path=join_path.path_elements, properties=with_properties,