From 2e8ec89771bd9d6db7b9ca5d301d3b2518d4ece0 Mon Sep 17 00:00:00 2001 From: tlento Date: Thu, 18 Apr 2024 12:31:57 -0700 Subject: [PATCH 1/3] Split LinkableElement classes into a separate module The linkable_spec_resolver module is quite unwieldy with all of these type-spec dataclasses inlined with the resolver itself. Historically, this made sense, because these classes were generally not used outside of contexts where we'd need the resolver itself. However, predicate pushdown will change this calculus, as the classes in the new linkable_elements module are the least objectionable way of propagating resolution-time metadata into the DataflowPlanBuilder. Splitting these out proactively helps us avoid inevitable circular dependencies in the runtime components when we start moving the metadata these classes represent across that boundary. --- metricflow/engine/metricflow_engine.py | 2 +- .../model/semantics/linkable_element.py | 128 ++++++++++++++++++ .../model/semantics/linkable_spec_resolver.py | 126 ++--------------- metricflow/model/semantics/metric_lookup.py | 2 +- .../model/semantics/semantic_model_lookup.py | 2 +- .../semantics/test_linkable_spec_resolver.py | 3 +- 6 files changed, 140 insertions(+), 123 deletions(-) create mode 100644 metricflow/model/semantics/linkable_element.py diff --git a/metricflow/engine/metricflow_engine.py b/metricflow/engine/metricflow_engine.py index e48e2f3b4b..0552666646 100644 --- a/metricflow/engine/metricflow_engine.py +++ b/metricflow/engine/metricflow_engine.py @@ -33,10 +33,10 @@ from metricflow.mf_logging.formatting import indent from metricflow.mf_logging.pretty_print import mf_pformat from metricflow.model.semantic_manifest_lookup import SemanticManifestLookup +from metricflow.model.semantics.linkable_element import LinkableDimension from metricflow.model.semantics.linkable_element_properties import ( LinkableElementProperty, ) -from metricflow.model.semantics.linkable_spec_resolver import LinkableDimension from metricflow.model.semantics.semantic_model_lookup import SemanticModelLookup from metricflow.naming.linkable_spec_name import StructuredLinkableSpecName from metricflow.plan_conversion.column_resolver import DunderColumnAssociationResolver diff --git a/metricflow/model/semantics/linkable_element.py b/metricflow/model/semantics/linkable_element.py new file mode 100644 index 0000000000..c4bb093595 --- /dev/null +++ b/metricflow/model/semantics/linkable_element.py @@ -0,0 +1,128 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import FrozenSet, Optional, Tuple + +from dbt_semantic_interfaces.references import DimensionReference, MetricReference, SemanticModelReference +from dbt_semantic_interfaces.type_enums.date_part import DatePart +from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity + +from metricflow.model.semantics.linkable_element_properties import LinkableElementProperty +from metricflow.specs.specs import EntityReference + + +@dataclass(frozen=True) +class ElementPathKey: + """A key that can uniquely identify an element and the joins used to realize the element.""" + + element_name: str + entity_links: Tuple[EntityReference, ...] + time_granularity: Optional[TimeGranularity] = None + date_part: Optional[DatePart] = None + + +@dataclass(frozen=True) +class SemanticModelJoinPathElement: + """Describes joining a semantic model by the given entity.""" + + semantic_model_reference: SemanticModelReference + join_on_entity: EntityReference + + +@dataclass(frozen=True) +class LinkableDimension: + """Describes how a dimension can be realized by joining based on entity links.""" + + # The semantic model where this dimension was defined. + semantic_model_origin: Optional[SemanticModelReference] + element_name: str + entity_links: Tuple[EntityReference, ...] + join_path: Tuple[SemanticModelJoinPathElement, ...] + properties: FrozenSet[LinkableElementProperty] + time_granularity: Optional[TimeGranularity] + date_part: Optional[DatePart] + + @property + def path_key(self) -> ElementPathKey: # noqa: D102 + return ElementPathKey( + element_name=self.element_name, + entity_links=self.entity_links, + time_granularity=self.time_granularity, + date_part=self.date_part, + ) + + @property + def reference(self) -> DimensionReference: # noqa: D102 + return DimensionReference(element_name=self.element_name) + + +@dataclass(frozen=True) +class LinkableEntity: + """Describes how an entity can be realized by joining based on entity links.""" + + # The semantic model where this entity was defined. + semantic_model_origin: SemanticModelReference + element_name: str + properties: FrozenSet[LinkableElementProperty] + entity_links: Tuple[EntityReference, ...] + join_path: Tuple[SemanticModelJoinPathElement, ...] + + @property + def path_key(self) -> ElementPathKey: # noqa: D102 + return ElementPathKey(element_name=self.element_name, entity_links=self.entity_links) + + @property + def reference(self) -> EntityReference: # noqa: D102 + return EntityReference(element_name=self.element_name) + + +@dataclass(frozen=True) +class LinkableMetric: + """Describes how a metric can be realized by joining based on entity links.""" + + element_name: str + join_by_semantic_model: SemanticModelReference + # TODO: Enable joining by dimension + entity_links: Tuple[EntityReference, ...] + properties: FrozenSet[LinkableElementProperty] + join_path: Tuple[SemanticModelJoinPathElement, ...] + + @property + def path_key(self) -> ElementPathKey: # noqa: D102 + return ElementPathKey(element_name=self.element_name, entity_links=self.entity_links) + + @property + def reference(self) -> MetricReference: # noqa: D102 + return MetricReference(element_name=self.element_name) + + +@dataclass(frozen=True) +class SemanticModelJoinPath: + """Describes a series of joins between the measure semantic model, and other semantic models by entity. + + For example: + + (measure_source JOIN dimension_source0 ON entity A) JOIN dimension_source1 ON entity B + + would be represented by 2 path elements [(semantic_model0, A), (dimension_source1, B)] + """ + + path_elements: Tuple[SemanticModelJoinPathElement, ...] + + @property + def last_path_element(self) -> SemanticModelJoinPathElement: # noqa: D102 + assert len(self.path_elements) > 0 + return self.path_elements[-1] + + @property + def last_semantic_model_reference(self) -> SemanticModelReference: + """The last semantic model that would be joined in this path.""" + return self.last_path_element.semantic_model_reference + + @property + def last_entity_link(self) -> EntityReference: # noqa: D102 + return self.last_path_element.join_on_entity + + @property + def entity_links(self) -> Tuple[EntityReference, ...]: # noqa: D102 + return tuple(path_element.join_on_entity for path_element in self.path_elements) diff --git a/metricflow/model/semantics/linkable_spec_resolver.py b/metricflow/model/semantics/linkable_spec_resolver.py index 387a8e7ace..a06abb1718 100644 --- a/metricflow/model/semantics/linkable_spec_resolver.py +++ b/metricflow/model/semantics/linkable_spec_resolver.py @@ -11,7 +11,6 @@ from dbt_semantic_interfaces.protocols.semantic_manifest import SemanticManifest from dbt_semantic_interfaces.protocols.semantic_model import SemanticModel from dbt_semantic_interfaces.references import ( - DimensionReference, MeasureReference, MetricReference, SemanticModelReference, @@ -24,6 +23,14 @@ from metricflow.dataset.dataset import DataSet from metricflow.errors.errors import UnknownMetricLinkingError from metricflow.mf_logging.pretty_print import mf_pformat +from metricflow.model.semantics.linkable_element import ( + ElementPathKey, + LinkableDimension, + LinkableEntity, + LinkableMetric, + SemanticModelJoinPath, + SemanticModelJoinPathElement, +) from metricflow.model.semantics.linkable_element_properties import LinkableElementProperty from metricflow.model.semantics.semantic_model_join_evaluator import SemanticModelJoinEvaluator from metricflow.specs.specs import ( @@ -43,83 +50,6 @@ logger = logging.getLogger(__name__) -@dataclass(frozen=True) -class ElementPathKey: - """A key that can uniquely identify an element and the joins used to realize the element.""" - - element_name: str - entity_links: Tuple[EntityReference, ...] - time_granularity: Optional[TimeGranularity] = None - date_part: Optional[DatePart] = None - - -@dataclass(frozen=True) -class LinkableDimension: - """Describes how a dimension can be realized by joining based on entity links.""" - - # The semantic model where this dimension was defined. - semantic_model_origin: Optional[SemanticModelReference] - element_name: str - entity_links: Tuple[EntityReference, ...] - join_path: Tuple[SemanticModelJoinPathElement, ...] - properties: FrozenSet[LinkableElementProperty] - time_granularity: Optional[TimeGranularity] - date_part: Optional[DatePart] - - @property - def path_key(self) -> ElementPathKey: # noqa: D102 - return ElementPathKey( - element_name=self.element_name, - entity_links=self.entity_links, - time_granularity=self.time_granularity, - date_part=self.date_part, - ) - - @property - def reference(self) -> DimensionReference: # noqa: D102 - return DimensionReference(element_name=self.element_name) - - -@dataclass(frozen=True) -class LinkableEntity: - """Describes how an entity can be realized by joining based on entity links.""" - - # The semantic model where this entity was defined. - semantic_model_origin: SemanticModelReference - element_name: str - properties: FrozenSet[LinkableElementProperty] - entity_links: Tuple[EntityReference, ...] - join_path: Tuple[SemanticModelJoinPathElement, ...] - - @property - def path_key(self) -> ElementPathKey: # noqa: D102 - return ElementPathKey(element_name=self.element_name, entity_links=self.entity_links) - - @property - def reference(self) -> EntityReference: # noqa: D102 - return EntityReference(element_name=self.element_name) - - -@dataclass(frozen=True) -class LinkableMetric: - """Describes how a metric can be realized by joining based on entity links.""" - - element_name: str - join_by_semantic_model: SemanticModelReference - # TODO: Enable joining by dimension - entity_links: Tuple[EntityReference, ...] - properties: FrozenSet[LinkableElementProperty] - join_path: Tuple[SemanticModelJoinPathElement, ...] - - @property - def path_key(self) -> ElementPathKey: # noqa: D102 - return ElementPathKey(element_name=self.element_name, entity_links=self.entity_links) - - @property - def reference(self) -> MetricReference: # noqa: D102 - return MetricReference(element_name=self.element_name) - - @dataclass(frozen=True) class LinkableElementSet: """Container class for storing all linkable elements for a metric. @@ -366,14 +296,6 @@ def only_unique_path_keys(self) -> LinkableElementSet: ) -@dataclass(frozen=True) -class SemanticModelJoinPathElement: - """Describes joining a semantic model by the given entity.""" - - semantic_model_reference: SemanticModelReference - join_on_entity: EntityReference - - def _generate_linkable_time_dimensions( semantic_model_origin: SemanticModelReference, dimension: Dimension, @@ -424,38 +346,6 @@ def _generate_linkable_time_dimensions( return linkable_dimensions -@dataclass(frozen=True) -class SemanticModelJoinPath: - """Describes a series of joins between the measure semantic model, and other semantic models by entity. - - For example: - - (measure_source JOIN dimension_source0 ON entity A) JOIN dimension_source1 ON entity B - - would be represented by 2 path elements [(semantic_model0, A), (dimension_source1, B)] - """ - - path_elements: Tuple[SemanticModelJoinPathElement, ...] - - @property - def last_path_element(self) -> SemanticModelJoinPathElement: # noqa: D102 - assert len(self.path_elements) > 0 - return self.path_elements[-1] - - @property - def last_semantic_model_reference(self) -> SemanticModelReference: - """The last semantic model that would be joined in this path.""" - return self.last_path_element.semantic_model_reference - - @property - def last_entity_link(self) -> EntityReference: # noqa: D102 - return self.last_path_element.join_on_entity - - @property - def entity_links(self) -> Tuple[EntityReference, ...]: # noqa: D102 - return tuple(path_element.join_on_entity for path_element in self.path_elements) - - class ValidLinkableSpecResolver: """Figures out what linkable specs are valid for a given metric. diff --git a/metricflow/model/semantics/metric_lookup.py b/metricflow/model/semantics/metric_lookup.py index 9ac04d764e..a49f199e58 100644 --- a/metricflow/model/semantics/metric_lookup.py +++ b/metricflow/model/semantics/metric_lookup.py @@ -9,9 +9,9 @@ from dbt_semantic_interfaces.references import MeasureReference, MetricReference, TimeDimensionReference from metricflow.errors.errors import DuplicateMetricError, MetricNotFoundError, NonExistentMeasureError +from metricflow.model.semantics.linkable_element import ElementPathKey from metricflow.model.semantics.linkable_element_properties import LinkableElementProperty from metricflow.model.semantics.linkable_spec_resolver import ( - ElementPathKey, LinkableElementSet, ValidLinkableSpecResolver, ) diff --git a/metricflow/model/semantics/semantic_model_lookup.py b/metricflow/model/semantics/semantic_model_lookup.py index 84dee39738..4d373f3fab 100644 --- a/metricflow/model/semantics/semantic_model_lookup.py +++ b/metricflow/model/semantics/semantic_model_lookup.py @@ -24,7 +24,7 @@ from metricflow.errors.errors import InvalidSemanticModelError from metricflow.mf_logging.pretty_print import mf_pformat from metricflow.model.semantics.element_group import ElementGrouper -from metricflow.model.semantics.linkable_spec_resolver import ElementPathKey +from metricflow.model.semantics.linkable_element import ElementPathKey from metricflow.model.spec_converters import MeasureConverter from metricflow.specs.specs import ( DimensionSpec, diff --git a/tests/model/semantics/test_linkable_spec_resolver.py b/tests/model/semantics/test_linkable_spec_resolver.py index 26ac6022ac..57afe81d2a 100644 --- a/tests/model/semantics/test_linkable_spec_resolver.py +++ b/tests/model/semantics/test_linkable_spec_resolver.py @@ -12,10 +12,9 @@ ) from metricflow.model.semantic_manifest_lookup import SemanticManifestLookup +from metricflow.model.semantics.linkable_element import SemanticModelJoinPath, SemanticModelJoinPathElement from metricflow.model.semantics.linkable_element_properties import LinkableElementProperty from metricflow.model.semantics.linkable_spec_resolver import ( - SemanticModelJoinPath, - SemanticModelJoinPathElement, ValidLinkableSpecResolver, ) from metricflow.model.semantics.semantic_model_join_evaluator import MAX_JOIN_HOPS From f573ce5c01f6a7b3a4fcd14e3a67ef54213c27a8 Mon Sep 17 00:00:00 2001 From: tlento Date: Thu, 18 Apr 2024 21:14:29 -0700 Subject: [PATCH 2/3] Move LinkableElementProperty enum into linkable_element module Now that we have a module for enumerations and dataclass type definitions we might as well consolidate. --- metricflow/engine/metricflow_engine.py | 4 +- .../model/semantics/linkable_element.py | 41 +++++++++++++++++- .../semantics/linkable_element_properties.py | 43 ------------------- .../model/semantics/linkable_spec_resolver.py | 2 +- metricflow/model/semantics/metric_lookup.py | 3 +- .../candidate_push_down/push_down_visitor.py | 2 +- .../semantics/test_linkable_spec_resolver.py | 7 ++- tests/model/test_semantic_model_container.py | 2 +- 8 files changed, 51 insertions(+), 53 deletions(-) delete mode 100644 metricflow/model/semantics/linkable_element_properties.py diff --git a/metricflow/engine/metricflow_engine.py b/metricflow/engine/metricflow_engine.py index 0552666646..24318affe9 100644 --- a/metricflow/engine/metricflow_engine.py +++ b/metricflow/engine/metricflow_engine.py @@ -33,8 +33,8 @@ from metricflow.mf_logging.formatting import indent from metricflow.mf_logging.pretty_print import mf_pformat from metricflow.model.semantic_manifest_lookup import SemanticManifestLookup -from metricflow.model.semantics.linkable_element import LinkableDimension -from metricflow.model.semantics.linkable_element_properties import ( +from metricflow.model.semantics.linkable_element import ( + LinkableDimension, LinkableElementProperty, ) from metricflow.model.semantics.semantic_model_lookup import SemanticModelLookup diff --git a/metricflow/model/semantics/linkable_element.py b/metricflow/model/semantics/linkable_element.py index c4bb093595..650748685e 100644 --- a/metricflow/model/semantics/linkable_element.py +++ b/metricflow/model/semantics/linkable_element.py @@ -1,16 +1,55 @@ from __future__ import annotations from dataclasses import dataclass +from enum import Enum from typing import FrozenSet, Optional, Tuple from dbt_semantic_interfaces.references import DimensionReference, MetricReference, SemanticModelReference from dbt_semantic_interfaces.type_enums.date_part import DatePart from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity -from metricflow.model.semantics.linkable_element_properties import LinkableElementProperty from metricflow.specs.specs import EntityReference +class LinkableElementProperty(Enum): + """The properties associated with a valid linkable element. + + Local means an element that is defined within the same semantic model as the measure. This definition is used + throughout the related classes. + """ + + # A local element as per above definition. + LOCAL = "local" + # A local dimension that is prefixed with a local primary entity. + LOCAL_LINKED = "local_linked" + # An element that was joined to the measure semantic model by an entity. + JOINED = "joined" + # An element that was joined to the measure semantic model by joining multiple semantic models. + MULTI_HOP = "multi_hop" + # A time dimension that is a version of a time dimension in a semantic model, but at a different granularity. + DERIVED_TIME_GRANULARITY = "derived_time_granularity" + # Refers to an entity, not a dimension. + ENTITY = "entity" + # See metric_time in DataSet + METRIC_TIME = "metric_time" + # Refers to a metric, not a dimension. + METRIC = "metric" + + @staticmethod + def all_properties() -> FrozenSet[LinkableElementProperty]: # noqa: D102 + return frozenset( + { + LinkableElementProperty.LOCAL, + LinkableElementProperty.LOCAL_LINKED, + LinkableElementProperty.JOINED, + LinkableElementProperty.MULTI_HOP, + LinkableElementProperty.DERIVED_TIME_GRANULARITY, + LinkableElementProperty.METRIC_TIME, + LinkableElementProperty.METRIC, + } + ) + + @dataclass(frozen=True) class ElementPathKey: """A key that can uniquely identify an element and the joins used to realize the element.""" diff --git a/metricflow/model/semantics/linkable_element_properties.py b/metricflow/model/semantics/linkable_element_properties.py deleted file mode 100644 index 668be83974..0000000000 --- a/metricflow/model/semantics/linkable_element_properties.py +++ /dev/null @@ -1,43 +0,0 @@ -from __future__ import annotations - -from enum import Enum -from typing import FrozenSet - - -class LinkableElementProperty(Enum): - """The properties associated with a valid linkable element. - - Local means an element that is defined within the same semantic model as the measure. This definition is used - throughout the related classes. - """ - - # A local element as per above definition. - LOCAL = "local" - # A local dimension that is prefixed with a local primary entity. - LOCAL_LINKED = "local_linked" - # An element that was joined to the measure semantic model by an entity. - JOINED = "joined" - # An element that was joined to the measure semantic model by joining multiple semantic models. - MULTI_HOP = "multi_hop" - # A time dimension that is a version of a time dimension in a semantic model, but at a different granularity. - DERIVED_TIME_GRANULARITY = "derived_time_granularity" - # Refers to an entity, not a dimension. - ENTITY = "entity" - # See metric_time in DataSet - METRIC_TIME = "metric_time" - # Refers to a metric, not a dimension. - METRIC = "metric" - - @staticmethod - def all_properties() -> FrozenSet[LinkableElementProperty]: # noqa: D102 - return frozenset( - { - LinkableElementProperty.LOCAL, - LinkableElementProperty.LOCAL_LINKED, - LinkableElementProperty.JOINED, - LinkableElementProperty.MULTI_HOP, - LinkableElementProperty.DERIVED_TIME_GRANULARITY, - LinkableElementProperty.METRIC_TIME, - LinkableElementProperty.METRIC, - } - ) diff --git a/metricflow/model/semantics/linkable_spec_resolver.py b/metricflow/model/semantics/linkable_spec_resolver.py index a06abb1718..ecd37db5fa 100644 --- a/metricflow/model/semantics/linkable_spec_resolver.py +++ b/metricflow/model/semantics/linkable_spec_resolver.py @@ -26,12 +26,12 @@ from metricflow.model.semantics.linkable_element import ( ElementPathKey, LinkableDimension, + LinkableElementProperty, LinkableEntity, LinkableMetric, SemanticModelJoinPath, SemanticModelJoinPathElement, ) -from metricflow.model.semantics.linkable_element_properties import LinkableElementProperty from metricflow.model.semantics.semantic_model_join_evaluator import SemanticModelJoinEvaluator from metricflow.specs.specs import ( DEFAULT_TIME_GRANULARITY, diff --git a/metricflow/model/semantics/metric_lookup.py b/metricflow/model/semantics/metric_lookup.py index a49f199e58..c4feecaaf8 100644 --- a/metricflow/model/semantics/metric_lookup.py +++ b/metricflow/model/semantics/metric_lookup.py @@ -9,8 +9,7 @@ from dbt_semantic_interfaces.references import MeasureReference, MetricReference, TimeDimensionReference from metricflow.errors.errors import DuplicateMetricError, MetricNotFoundError, NonExistentMeasureError -from metricflow.model.semantics.linkable_element import ElementPathKey -from metricflow.model.semantics.linkable_element_properties import LinkableElementProperty +from metricflow.model.semantics.linkable_element import ElementPathKey, LinkableElementProperty from metricflow.model.semantics.linkable_spec_resolver import ( LinkableElementSet, ValidLinkableSpecResolver, diff --git a/metricflow/query/group_by_item/candidate_push_down/push_down_visitor.py b/metricflow/query/group_by_item/candidate_push_down/push_down_visitor.py index 72160fd69f..7879e80514 100644 --- a/metricflow/query/group_by_item/candidate_push_down/push_down_visitor.py +++ b/metricflow/query/group_by_item/candidate_push_down/push_down_visitor.py @@ -12,7 +12,7 @@ from metricflow.mf_logging.formatting import indent from metricflow.mf_logging.pretty_print import mf_pformat, mf_pformat_many from metricflow.model.semantic_manifest_lookup import SemanticManifestLookup -from metricflow.model.semantics.linkable_element_properties import LinkableElementProperty +from metricflow.model.semantics.linkable_element import LinkableElementProperty from metricflow.query.group_by_item.candidate_push_down.group_by_item_candidate import GroupByItemCandidateSet from metricflow.query.group_by_item.resolution_dag.resolution_nodes.base_node import ( GroupByItemResolutionNode, diff --git a/tests/model/semantics/test_linkable_spec_resolver.py b/tests/model/semantics/test_linkable_spec_resolver.py index 57afe81d2a..b6d26c141a 100644 --- a/tests/model/semantics/test_linkable_spec_resolver.py +++ b/tests/model/semantics/test_linkable_spec_resolver.py @@ -12,8 +12,11 @@ ) from metricflow.model.semantic_manifest_lookup import SemanticManifestLookup -from metricflow.model.semantics.linkable_element import SemanticModelJoinPath, SemanticModelJoinPathElement -from metricflow.model.semantics.linkable_element_properties import LinkableElementProperty +from metricflow.model.semantics.linkable_element import ( + LinkableElementProperty, + SemanticModelJoinPath, + SemanticModelJoinPathElement, +) from metricflow.model.semantics.linkable_spec_resolver import ( ValidLinkableSpecResolver, ) diff --git a/tests/model/test_semantic_model_container.py b/tests/model/test_semantic_model_container.py index 9f22fb3ac0..53b877c603 100644 --- a/tests/model/test_semantic_model_container.py +++ b/tests/model/test_semantic_model_container.py @@ -7,7 +7,7 @@ from dbt_semantic_interfaces.protocols.semantic_manifest import SemanticManifest from dbt_semantic_interfaces.references import EntityReference, MeasureReference, MetricReference -from metricflow.model.semantics.linkable_element_properties import LinkableElementProperty +from metricflow.model.semantics.linkable_element import LinkableElementProperty from metricflow.model.semantics.metric_lookup import MetricLookup from metricflow.model.semantics.semantic_model_lookup import SemanticModelLookup from tests.fixtures.setup_fixtures import MetricFlowTestConfiguration From aaca4b7e3f92ef3b14541cfbf993604841d18694 Mon Sep 17 00:00:00 2001 From: tlento Date: Thu, 18 Apr 2024 21:17:30 -0700 Subject: [PATCH 3/3] Split LinkableElementSet into its own module This is now being returned through methods in the MetricLookup, which means it can be used independently of the LinkableSpecResolver. As such it merits its own module. This will also be useful for the short term need to access this class in the predicate pushdown implementation, and longer term might make it easier to consolidate this class with the LinkableSpecSet. --- .../model/semantics/linkable_element_set.py | 260 ++++++++++++++++++ .../model/semantics/linkable_spec_resolver.py | 253 +---------------- metricflow/model/semantics/metric_lookup.py | 2 +- tests/snapshot_utils.py | 2 +- 4 files changed, 263 insertions(+), 254 deletions(-) create mode 100644 metricflow/model/semantics/linkable_element_set.py diff --git a/metricflow/model/semantics/linkable_element_set.py b/metricflow/model/semantics/linkable_element_set.py new file mode 100644 index 0000000000..e36dda52f7 --- /dev/null +++ b/metricflow/model/semantics/linkable_element_set.py @@ -0,0 +1,260 @@ +from __future__ import annotations + +from collections import defaultdict +from dataclasses import dataclass, field +from typing import Dict, FrozenSet, List, Sequence, Set, Tuple + +from metricflow.model.semantics.linkable_element import ( + ElementPathKey, + LinkableDimension, + LinkableElementProperty, + LinkableEntity, + LinkableMetric, +) +from metricflow.specs.specs import DimensionSpec, EntitySpec, GroupByMetricSpec, LinkableSpecSet, TimeDimensionSpec + + +@dataclass(frozen=True) +class LinkableElementSet: + """Container class for storing all linkable elements for a metric. + + TODO: There are similarities with LinkableSpecSet - consider consolidation. + """ + + # Dictionaries that map the path key to context on the dimension + # + # For example: + # { + # "listing__country_latest": ( + # LinkableDimension( + # element_name="country_latest", + # entity_links=("listing",), + # semantic_model_origin="listings_latest_source", + # ) + # } + path_key_to_linkable_dimensions: Dict[ElementPathKey, Tuple[LinkableDimension, ...]] = field(default_factory=dict) + path_key_to_linkable_entities: Dict[ElementPathKey, Tuple[LinkableEntity, ...]] = field(default_factory=dict) + path_key_to_linkable_metrics: Dict[ElementPathKey, Tuple[LinkableMetric, ...]] = field(default_factory=dict) + + @staticmethod + def merge_by_path_key(linkable_element_sets: Sequence[LinkableElementSet]) -> LinkableElementSet: + """Combine multiple sets together by the path key. + + If there are elements with the same join key, those elements will be categorized as ambiguous. + """ + key_to_linkable_dimensions: Dict[ElementPathKey, List[LinkableDimension]] = defaultdict(list) + key_to_linkable_entities: Dict[ElementPathKey, List[LinkableEntity]] = defaultdict(list) + key_to_linkable_metrics: Dict[ElementPathKey, List[LinkableMetric]] = defaultdict(list) + + for linkable_element_set in linkable_element_sets: + for path_key, linkable_dimensions in linkable_element_set.path_key_to_linkable_dimensions.items(): + key_to_linkable_dimensions[path_key].extend(linkable_dimensions) + for path_key, linkable_entities in linkable_element_set.path_key_to_linkable_entities.items(): + key_to_linkable_entities[path_key].extend(linkable_entities) + for path_key, linkable_metrics in linkable_element_set.path_key_to_linkable_metrics.items(): + key_to_linkable_metrics[path_key].extend(linkable_metrics) + + # Convert the dictionaries to use tuples instead of lists. + return LinkableElementSet( + path_key_to_linkable_dimensions={ + path_key: tuple(dimensions) for path_key, dimensions in key_to_linkable_dimensions.items() + }, + path_key_to_linkable_entities={ + path_key: tuple(entities) for path_key, entities in key_to_linkable_entities.items() + }, + path_key_to_linkable_metrics={ + path_key: tuple(metrics) for path_key, metrics in key_to_linkable_metrics.items() + }, + ) + + @staticmethod + def intersection_by_path_key(linkable_element_sets: Sequence[LinkableElementSet]) -> LinkableElementSet: + """Find the intersection of all elements in the sets by path key. + + This is useful to figure out the common dimensions that are possible to query with multiple metrics. You would + find the LinkableSpecSet for each metric in the query, then do an intersection of the sets. + """ + if len(linkable_element_sets) == 0: + return LinkableElementSet() + + # Find path keys that are common to all LinkableElementSets. + dimension_path_keys: List[Set[ElementPathKey]] = [] + entity_path_keys: List[Set[ElementPathKey]] = [] + metric_path_keys: List[Set[ElementPathKey]] = [] + for linkable_element_set in linkable_element_sets: + dimension_path_keys.append(set(linkable_element_set.path_key_to_linkable_dimensions.keys())) + entity_path_keys.append(set(linkable_element_set.path_key_to_linkable_entities.keys())) + metric_path_keys.append(set(linkable_element_set.path_key_to_linkable_metrics.keys())) + common_linkable_dimension_path_keys = set.intersection(*dimension_path_keys) if dimension_path_keys else set() + common_linkable_entity_path_keys = set.intersection(*entity_path_keys) if entity_path_keys else set() + common_linkable_metric_path_keys = set.intersection(*metric_path_keys) if metric_path_keys else set() + + # Create a new LinkableElementSet that only includes items where the path key is common to all sets. + join_path_to_linkable_dimensions: Dict[ElementPathKey, Set[LinkableDimension]] = defaultdict(set) + join_path_to_linkable_entities: Dict[ElementPathKey, Set[LinkableEntity]] = defaultdict(set) + join_path_to_linkable_metrics: Dict[ElementPathKey, Set[LinkableMetric]] = defaultdict(set) + + for linkable_element_set in linkable_element_sets: + for path_key, linkable_dimensions in linkable_element_set.path_key_to_linkable_dimensions.items(): + if path_key in common_linkable_dimension_path_keys: + join_path_to_linkable_dimensions[path_key].update(linkable_dimensions) + for path_key, linkable_entities in linkable_element_set.path_key_to_linkable_entities.items(): + if path_key in common_linkable_entity_path_keys: + join_path_to_linkable_entities[path_key].update(linkable_entities) + for path_key, linkable_metrics in linkable_element_set.path_key_to_linkable_metrics.items(): + if path_key in common_linkable_metric_path_keys: + join_path_to_linkable_metrics[path_key].update(linkable_metrics) + + return LinkableElementSet( + path_key_to_linkable_dimensions={ + path_key: tuple( + sorted( + dimensions, + key=lambda linkable_dimension: ( + linkable_dimension.semantic_model_origin.semantic_model_name + if linkable_dimension.semantic_model_origin + else "" + ), + ) + ) + for path_key, dimensions in join_path_to_linkable_dimensions.items() + }, + path_key_to_linkable_entities={ + path_key: tuple( + sorted( + entities, key=lambda linkable_entity: linkable_entity.semantic_model_origin.semantic_model_name + ) + ) + for path_key, entities in join_path_to_linkable_entities.items() + }, + path_key_to_linkable_metrics={ + path_key: tuple( + sorted( + metrics, key=lambda linkable_metric: linkable_metric.join_by_semantic_model.semantic_model_name + ) + ) + for path_key, metrics in join_path_to_linkable_metrics.items() + }, + ) + + def filter( + self, + with_any_of: FrozenSet[LinkableElementProperty], + without_any_of: FrozenSet[LinkableElementProperty] = frozenset(), + without_all_of: FrozenSet[LinkableElementProperty] = frozenset(), + ) -> LinkableElementSet: + """Filter elements in the set. + + First, only elements with at least one property in the "with_any_of" set are retained. Then, any elements with + a property in "without_any_of" set are removed. Lastly, any elements with all properties in without_all_of + are removed. + """ + key_to_linkable_dimensions: Dict[ElementPathKey, Tuple[LinkableDimension, ...]] = {} + key_to_linkable_entities: Dict[ElementPathKey, Tuple[LinkableEntity, ...]] = {} + key_to_linkable_metrics: Dict[ElementPathKey, Tuple[LinkableMetric, ...]] = {} + + for path_key, linkable_dimensions in self.path_key_to_linkable_dimensions.items(): + filtered_linkable_dimensions = tuple( + linkable_dimension + for linkable_dimension in linkable_dimensions + if len(linkable_dimension.properties.intersection(with_any_of)) > 0 + and len(linkable_dimension.properties.intersection(without_any_of)) == 0 + and ( + len(without_all_of) == 0 + or linkable_dimension.properties.intersection(without_all_of) != without_all_of + ) + ) + if len(filtered_linkable_dimensions) > 0: + key_to_linkable_dimensions[path_key] = filtered_linkable_dimensions + + for path_key, linkable_entities in self.path_key_to_linkable_entities.items(): + filtered_linkable_entities = tuple( + linkable_entity + for linkable_entity in linkable_entities + if len(linkable_entity.properties.intersection(with_any_of)) > 0 + and len(linkable_entity.properties.intersection(without_any_of)) == 0 + and ( + len(without_all_of) == 0 + or linkable_entity.properties.intersection(without_all_of) != without_all_of + ) + ) + if len(filtered_linkable_entities) > 0: + key_to_linkable_entities[path_key] = filtered_linkable_entities + + for path_key, linkable_metrics in self.path_key_to_linkable_metrics.items(): + filtered_linkable_metrics = tuple( + linkable_metric + for linkable_metric in linkable_metrics + if len(linkable_metric.properties.intersection(with_any_of)) > 0 + and len(linkable_metric.properties.intersection(without_any_of)) == 0 + and ( + len(without_all_of) == 0 + or linkable_metric.properties.intersection(without_all_of) != without_all_of + ) + ) + if len(filtered_linkable_metrics) > 0: + key_to_linkable_metrics[path_key] = filtered_linkable_metrics + + return LinkableElementSet( + path_key_to_linkable_dimensions=key_to_linkable_dimensions, + path_key_to_linkable_entities=key_to_linkable_entities, + path_key_to_linkable_metrics=key_to_linkable_metrics, + ) + + @property + def as_spec_set(self) -> LinkableSpecSet: # noqa: D102 + return LinkableSpecSet( + dimension_specs=tuple( + DimensionSpec( + element_name=path_key.element_name, + entity_links=path_key.entity_links, + ) + for path_key in self.path_key_to_linkable_dimensions.keys() + if not path_key.time_granularity + ), + time_dimension_specs=tuple( + TimeDimensionSpec( + element_name=path_key.element_name, + entity_links=path_key.entity_links, + time_granularity=path_key.time_granularity, + date_part=path_key.date_part, + ) + for path_key in self.path_key_to_linkable_dimensions.keys() + if path_key.time_granularity + ), + entity_specs=tuple( + EntitySpec( + element_name=path_key.element_name, + entity_links=path_key.entity_links, + ) + for path_key in self.path_key_to_linkable_entities + ), + group_by_metric_specs=tuple( + GroupByMetricSpec( + element_name=path_key.element_name, + entity_links=path_key.entity_links, + ) + for path_key in self.path_key_to_linkable_metrics + ), + ) + + @property + def only_unique_path_keys(self) -> LinkableElementSet: + """Returns a set that only includes path keys that map to a single element.""" + return LinkableElementSet( + path_key_to_linkable_dimensions={ + path_key: linkable_dimensions + for path_key, linkable_dimensions in self.path_key_to_linkable_dimensions.items() + if len(linkable_dimensions) <= 1 + }, + path_key_to_linkable_entities={ + path_key: linkable_entities + for path_key, linkable_entities in self.path_key_to_linkable_entities.items() + if len(linkable_entities) <= 1 + }, + path_key_to_linkable_metrics={ + path_key: linkable_metrics + for path_key, linkable_metrics in self.path_key_to_linkable_metrics.items() + if len(linkable_metrics) <= 1 + }, + ) diff --git a/metricflow/model/semantics/linkable_spec_resolver.py b/metricflow/model/semantics/linkable_spec_resolver.py index ecd37db5fa..767990fa8c 100644 --- a/metricflow/model/semantics/linkable_spec_resolver.py +++ b/metricflow/model/semantics/linkable_spec_resolver.py @@ -3,7 +3,6 @@ import logging import time from collections import defaultdict -from dataclasses import dataclass, field from typing import TYPE_CHECKING, Dict, FrozenSet, List, Optional, Sequence, Set, Tuple from dbt_semantic_interfaces.enum_extension import assert_values_exhausted @@ -32,15 +31,11 @@ SemanticModelJoinPath, SemanticModelJoinPathElement, ) +from metricflow.model.semantics.linkable_element_set import LinkableElementSet from metricflow.model.semantics.semantic_model_join_evaluator import SemanticModelJoinEvaluator from metricflow.specs.specs import ( DEFAULT_TIME_GRANULARITY, - DimensionSpec, EntityReference, - EntitySpec, - GroupByMetricSpec, - LinkableSpecSet, - TimeDimensionSpec, ) if TYPE_CHECKING: @@ -50,252 +45,6 @@ logger = logging.getLogger(__name__) -@dataclass(frozen=True) -class LinkableElementSet: - """Container class for storing all linkable elements for a metric. - - TODO: There are similarities with LinkableSpecSet - consider consolidation. - """ - - # Dictionaries that map the path key to context on the dimension - # - # For example: - # { - # "listing__country_latest": ( - # LinkableDimension( - # element_name="country_latest", - # entity_links=("listing",), - # semantic_model_origin="listings_latest_source", - # ) - # } - path_key_to_linkable_dimensions: Dict[ElementPathKey, Tuple[LinkableDimension, ...]] = field(default_factory=dict) - path_key_to_linkable_entities: Dict[ElementPathKey, Tuple[LinkableEntity, ...]] = field(default_factory=dict) - path_key_to_linkable_metrics: Dict[ElementPathKey, Tuple[LinkableMetric, ...]] = field(default_factory=dict) - - @staticmethod - def merge_by_path_key(linkable_element_sets: Sequence[LinkableElementSet]) -> LinkableElementSet: - """Combine multiple sets together by the path key. - - If there are elements with the same join key, those elements will be categorized as ambiguous. - """ - key_to_linkable_dimensions: Dict[ElementPathKey, List[LinkableDimension]] = defaultdict(list) - key_to_linkable_entities: Dict[ElementPathKey, List[LinkableEntity]] = defaultdict(list) - key_to_linkable_metrics: Dict[ElementPathKey, List[LinkableMetric]] = defaultdict(list) - - for linkable_element_set in linkable_element_sets: - for path_key, linkable_dimensions in linkable_element_set.path_key_to_linkable_dimensions.items(): - key_to_linkable_dimensions[path_key].extend(linkable_dimensions) - for path_key, linkable_entities in linkable_element_set.path_key_to_linkable_entities.items(): - key_to_linkable_entities[path_key].extend(linkable_entities) - for path_key, linkable_metrics in linkable_element_set.path_key_to_linkable_metrics.items(): - key_to_linkable_metrics[path_key].extend(linkable_metrics) - - # Convert the dictionaries to use tuples instead of lists. - return LinkableElementSet( - path_key_to_linkable_dimensions={ - path_key: tuple(dimensions) for path_key, dimensions in key_to_linkable_dimensions.items() - }, - path_key_to_linkable_entities={ - path_key: tuple(entities) for path_key, entities in key_to_linkable_entities.items() - }, - path_key_to_linkable_metrics={ - path_key: tuple(metrics) for path_key, metrics in key_to_linkable_metrics.items() - }, - ) - - @staticmethod - def intersection_by_path_key(linkable_element_sets: Sequence[LinkableElementSet]) -> LinkableElementSet: - """Find the intersection of all elements in the sets by path key. - - This is useful to figure out the common dimensions that are possible to query with multiple metrics. You would - find the LinkableSpecSet for each metric in the query, then do an intersection of the sets. - """ - if len(linkable_element_sets) == 0: - return LinkableElementSet() - - # Find path keys that are common to all LinkableElementSets. - dimension_path_keys: List[Set[ElementPathKey]] = [] - entity_path_keys: List[Set[ElementPathKey]] = [] - metric_path_keys: List[Set[ElementPathKey]] = [] - for linkable_element_set in linkable_element_sets: - dimension_path_keys.append(set(linkable_element_set.path_key_to_linkable_dimensions.keys())) - entity_path_keys.append(set(linkable_element_set.path_key_to_linkable_entities.keys())) - metric_path_keys.append(set(linkable_element_set.path_key_to_linkable_metrics.keys())) - common_linkable_dimension_path_keys = set.intersection(*dimension_path_keys) if dimension_path_keys else set() - common_linkable_entity_path_keys = set.intersection(*entity_path_keys) if entity_path_keys else set() - common_linkable_metric_path_keys = set.intersection(*metric_path_keys) if metric_path_keys else set() - - # Create a new LinkableElementSet that only includes items where the path key is common to all sets. - join_path_to_linkable_dimensions: Dict[ElementPathKey, Set[LinkableDimension]] = defaultdict(set) - join_path_to_linkable_entities: Dict[ElementPathKey, Set[LinkableEntity]] = defaultdict(set) - join_path_to_linkable_metrics: Dict[ElementPathKey, Set[LinkableMetric]] = defaultdict(set) - - for linkable_element_set in linkable_element_sets: - for path_key, linkable_dimensions in linkable_element_set.path_key_to_linkable_dimensions.items(): - if path_key in common_linkable_dimension_path_keys: - join_path_to_linkable_dimensions[path_key].update(linkable_dimensions) - for path_key, linkable_entities in linkable_element_set.path_key_to_linkable_entities.items(): - if path_key in common_linkable_entity_path_keys: - join_path_to_linkable_entities[path_key].update(linkable_entities) - for path_key, linkable_metrics in linkable_element_set.path_key_to_linkable_metrics.items(): - if path_key in common_linkable_metric_path_keys: - join_path_to_linkable_metrics[path_key].update(linkable_metrics) - - return LinkableElementSet( - path_key_to_linkable_dimensions={ - path_key: tuple( - sorted( - dimensions, - key=lambda linkable_dimension: ( - linkable_dimension.semantic_model_origin.semantic_model_name - if linkable_dimension.semantic_model_origin - else "" - ), - ) - ) - for path_key, dimensions in join_path_to_linkable_dimensions.items() - }, - path_key_to_linkable_entities={ - path_key: tuple( - sorted( - entities, key=lambda linkable_entity: linkable_entity.semantic_model_origin.semantic_model_name - ) - ) - for path_key, entities in join_path_to_linkable_entities.items() - }, - path_key_to_linkable_metrics={ - path_key: tuple( - sorted( - metrics, key=lambda linkable_metric: linkable_metric.join_by_semantic_model.semantic_model_name - ) - ) - for path_key, metrics in join_path_to_linkable_metrics.items() - }, - ) - - def filter( - self, - with_any_of: FrozenSet[LinkableElementProperty], - without_any_of: FrozenSet[LinkableElementProperty] = frozenset(), - without_all_of: FrozenSet[LinkableElementProperty] = frozenset(), - ) -> LinkableElementSet: - """Filter elements in the set. - - First, only elements with at least one property in the "with_any_of" set are retained. Then, any elements with - a property in "without_any_of" set are removed. Lastly, any elements with all properties in without_all_of - are removed. - """ - key_to_linkable_dimensions: Dict[ElementPathKey, Tuple[LinkableDimension, ...]] = {} - key_to_linkable_entities: Dict[ElementPathKey, Tuple[LinkableEntity, ...]] = {} - key_to_linkable_metrics: Dict[ElementPathKey, Tuple[LinkableMetric, ...]] = {} - - for path_key, linkable_dimensions in self.path_key_to_linkable_dimensions.items(): - filtered_linkable_dimensions = tuple( - linkable_dimension - for linkable_dimension in linkable_dimensions - if len(linkable_dimension.properties.intersection(with_any_of)) > 0 - and len(linkable_dimension.properties.intersection(without_any_of)) == 0 - and ( - len(without_all_of) == 0 - or linkable_dimension.properties.intersection(without_all_of) != without_all_of - ) - ) - if len(filtered_linkable_dimensions) > 0: - key_to_linkable_dimensions[path_key] = filtered_linkable_dimensions - - for path_key, linkable_entities in self.path_key_to_linkable_entities.items(): - filtered_linkable_entities = tuple( - linkable_entity - for linkable_entity in linkable_entities - if len(linkable_entity.properties.intersection(with_any_of)) > 0 - and len(linkable_entity.properties.intersection(without_any_of)) == 0 - and ( - len(without_all_of) == 0 - or linkable_entity.properties.intersection(without_all_of) != without_all_of - ) - ) - if len(filtered_linkable_entities) > 0: - key_to_linkable_entities[path_key] = filtered_linkable_entities - - for path_key, linkable_metrics in self.path_key_to_linkable_metrics.items(): - filtered_linkable_metrics = tuple( - linkable_metric - for linkable_metric in linkable_metrics - if len(linkable_metric.properties.intersection(with_any_of)) > 0 - and len(linkable_metric.properties.intersection(without_any_of)) == 0 - and ( - len(without_all_of) == 0 - or linkable_metric.properties.intersection(without_all_of) != without_all_of - ) - ) - if len(filtered_linkable_metrics) > 0: - key_to_linkable_metrics[path_key] = filtered_linkable_metrics - - return LinkableElementSet( - path_key_to_linkable_dimensions=key_to_linkable_dimensions, - path_key_to_linkable_entities=key_to_linkable_entities, - path_key_to_linkable_metrics=key_to_linkable_metrics, - ) - - @property - def as_spec_set(self) -> LinkableSpecSet: # noqa: D102 - return LinkableSpecSet( - dimension_specs=tuple( - DimensionSpec( - element_name=path_key.element_name, - entity_links=path_key.entity_links, - ) - for path_key in self.path_key_to_linkable_dimensions.keys() - if not path_key.time_granularity - ), - time_dimension_specs=tuple( - TimeDimensionSpec( - element_name=path_key.element_name, - entity_links=path_key.entity_links, - time_granularity=path_key.time_granularity, - date_part=path_key.date_part, - ) - for path_key in self.path_key_to_linkable_dimensions.keys() - if path_key.time_granularity - ), - entity_specs=tuple( - EntitySpec( - element_name=path_key.element_name, - entity_links=path_key.entity_links, - ) - for path_key in self.path_key_to_linkable_entities - ), - group_by_metric_specs=tuple( - GroupByMetricSpec( - element_name=path_key.element_name, - entity_links=path_key.entity_links, - ) - for path_key in self.path_key_to_linkable_metrics - ), - ) - - @property - def only_unique_path_keys(self) -> LinkableElementSet: - """Returns a set that only includes path keys that map to a single element.""" - return LinkableElementSet( - path_key_to_linkable_dimensions={ - path_key: linkable_dimensions - for path_key, linkable_dimensions in self.path_key_to_linkable_dimensions.items() - if len(linkable_dimensions) <= 1 - }, - path_key_to_linkable_entities={ - path_key: linkable_entities - for path_key, linkable_entities in self.path_key_to_linkable_entities.items() - if len(linkable_entities) <= 1 - }, - path_key_to_linkable_metrics={ - path_key: linkable_metrics - for path_key, linkable_metrics in self.path_key_to_linkable_metrics.items() - if len(linkable_metrics) <= 1 - }, - ) - - def _generate_linkable_time_dimensions( semantic_model_origin: SemanticModelReference, dimension: Dimension, diff --git a/metricflow/model/semantics/metric_lookup.py b/metricflow/model/semantics/metric_lookup.py index c4feecaaf8..1e57a42d51 100644 --- a/metricflow/model/semantics/metric_lookup.py +++ b/metricflow/model/semantics/metric_lookup.py @@ -10,8 +10,8 @@ from metricflow.errors.errors import DuplicateMetricError, MetricNotFoundError, NonExistentMeasureError from metricflow.model.semantics.linkable_element import ElementPathKey, LinkableElementProperty +from metricflow.model.semantics.linkable_element_set import LinkableElementSet from metricflow.model.semantics.linkable_spec_resolver import ( - LinkableElementSet, ValidLinkableSpecResolver, ) from metricflow.model.semantics.semantic_model_join_evaluator import MAX_JOIN_HOPS diff --git a/tests/snapshot_utils.py b/tests/snapshot_utils.py index d6b7771c8a..d0e230c61d 100644 --- a/tests/snapshot_utils.py +++ b/tests/snapshot_utils.py @@ -11,7 +11,7 @@ from metricflow.dataflow.dataflow_plan import DataflowPlan from metricflow.execution.execution_plan import ExecutionPlan from metricflow.mf_logging.pretty_print import mf_pformat -from metricflow.model.semantics.linkable_spec_resolver import LinkableElementSet +from metricflow.model.semantics.linkable_element_set import LinkableElementSet from metricflow.naming.object_builder_scheme import ObjectBuilderNamingScheme from metricflow.protocols.sql_client import SqlClient, SqlEngine from metricflow.specs.specs import InstanceSpecSet, LinkableSpecSet