-
Notifications
You must be signed in to change notification settings - Fork 96
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add a Resolver for Saved-Query Dependencies #1152
Changes from 12 commits
9d73ca5
0fa7db7
c270b8b
4367174
796b367
0673c22
9cb1650
5ed211e
847a4d2
d31029e
76a11f9
4a19e0f
bcbf1bf
07761a3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
kind: Features | ||
body: Add a Dependency Resolver for Saved Queries | ||
time: 2024-04-26T14:41:19.27946-07:00 | ||
custom: | ||
Author: plypaul | ||
Issue: "1155" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
from __future__ import annotations | ||
|
||
import logging | ||
from dataclasses import dataclass | ||
from typing import Tuple | ||
|
||
from dbt_semantic_interfaces.protocols import SemanticManifest | ||
from dbt_semantic_interfaces.references import ( | ||
SemanticModelReference, | ||
) | ||
|
||
from metricflow_semantics.model.semantic_manifest_lookup import SemanticManifestLookup | ||
from metricflow_semantics.query.query_parser import MetricFlowQueryParser | ||
from metricflow_semantics.specs.query_param_implementations import SavedQueryParameter | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
@dataclass(frozen=True) | ||
class SavedQueryDependencySet: | ||
"""The dependencies of a saved query. | ||
|
||
The primary use case is to handle creation of the cache item associated with the saved query. The dependencies | ||
listed in this class must be up-to-date before the cache associated with the saved query can be created. Otherwise, | ||
running the export / creating the cache may create a cache item that is out-of-date / unusable. | ||
""" | ||
|
||
# The semantic models that the saved query depends on. | ||
semantic_model_references: Tuple[SemanticModelReference, ...] | ||
|
||
|
||
class SavedQueryDependencyResolver: | ||
"""Resolves the dependencies of a saved query. Also see `SavedQueryDependencySet`.""" | ||
|
||
def __init__(self, semantic_manifest: SemanticManifest) -> None: # noqa: D107 | ||
self._semantic_manifest = semantic_manifest | ||
self._query_parser = MetricFlowQueryParser(SemanticManifestLookup(semantic_manifest)) | ||
|
||
def _resolve_dependencies(self, saved_query_name: str) -> SavedQueryDependencySet: | ||
parse_result = self._query_parser.parse_and_validate_saved_query( | ||
saved_query_parameter=SavedQueryParameter(saved_query_name), | ||
where_filter=None, | ||
limit=None, | ||
time_constraint_start=None, | ||
time_constraint_end=None, | ||
order_by_names=None, | ||
order_by_parameters=None, | ||
) | ||
|
||
return SavedQueryDependencySet( | ||
semantic_model_references=tuple( | ||
sorted( | ||
parse_result.queried_semantic_models, | ||
key=lambda reference: reference.semantic_model_name, | ||
) | ||
), | ||
) | ||
|
||
def resolve_dependencies(self, saved_query_name: str) -> SavedQueryDependencySet: | ||
"""Return the dependencies of the given saved query in the manifest.""" | ||
try: | ||
return self._resolve_dependencies(saved_query_name) | ||
except Exception: | ||
logger.exception( | ||
f"Got an exception while getting the dependencies of saved-query {repr(saved_query_name)}. " | ||
f"All semantic models will be returned instead for safety." | ||
) | ||
return SavedQueryDependencySet( | ||
semantic_model_references=tuple( | ||
sorted( | ||
(semantic_model.reference for semantic_model in self._semantic_manifest.semantic_models), | ||
key=lambda reference: reference.semantic_model_name, | ||
) | ||
), | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
from __future__ import annotations | ||
|
||
from typing import Dict, Iterable, Tuple, TypeVar | ||
|
||
IterableT = TypeVar("IterableT") | ||
|
||
|
||
def ordered_dedupe(*iterables: Iterable[IterableT]) -> Tuple[IterableT, ...]: | ||
"""De-duplicates the items in the iterables while preserving the order.""" | ||
ordered_results: Dict[IterableT, None] = {} | ||
for iterable in iterables: | ||
for item in iterable: | ||
ordered_results[item] = None | ||
|
||
return tuple(ordered_results.keys()) | ||
Comment on lines
+8
to
+15
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a less robust custom implementation of the (admittedly poorly named) unique_everseen function in more-itertools. Since dbt-semantic-interfaces depends on more-itertools we should add the dependency with the same version range and use it here as well. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually I just noticed this takes There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure, we can make that change, but let me check about the "minimize dependencies" request and check how that method works with types. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I already looked at the dependencies, that's not an issue. The typing is another matter, more-itertools is an old library and these methods date back a long way. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
from __future__ import annotations | ||
|
||
from enum import Enum | ||
from typing import FrozenSet | ||
|
||
|
||
class LinkableElementProperty(Enum): | ||
"""The properties associated with a valid linkable element. | ||
|
||
Local means an element that is defined within the same semantic model as the measure. This definition is used | ||
throughout the related classes. | ||
""" | ||
|
||
# A local element as per above definition. | ||
LOCAL = "local" | ||
# A local dimension that is prefixed with a local primary entity. | ||
LOCAL_LINKED = "local_linked" | ||
# An element that was joined to the measure semantic model by an entity. | ||
JOINED = "joined" | ||
# An element that was joined to the measure semantic model by joining multiple semantic models. | ||
MULTI_HOP = "multi_hop" | ||
# A time dimension that is a version of a time dimension in a semantic model, but at a different granularity. | ||
DERIVED_TIME_GRANULARITY = "derived_time_granularity" | ||
# Refers to an entity, not a dimension. | ||
ENTITY = "entity" | ||
# See metric_time in DataSet | ||
METRIC_TIME = "metric_time" | ||
# Refers to a metric, not a dimension. | ||
METRIC = "metric" | ||
|
||
@staticmethod | ||
def all_properties() -> FrozenSet[LinkableElementProperty]: # noqa: D102 | ||
return frozenset( | ||
{ | ||
LinkableElementProperty.LOCAL, | ||
LinkableElementProperty.LOCAL_LINKED, | ||
LinkableElementProperty.JOINED, | ||
LinkableElementProperty.MULTI_HOP, | ||
LinkableElementProperty.DERIVED_TIME_GRANULARITY, | ||
LinkableElementProperty.METRIC_TIME, | ||
LinkableElementProperty.METRIC, | ||
} | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
from __future__ import annotations | ||
|
||
from abc import ABC, abstractmethod | ||
from typing import Sequence | ||
|
||
from dbt_semantic_interfaces.references import SemanticModelReference | ||
|
||
|
||
class SemanticModelDerivation(ABC): | ||
"""Interface for an object that can be described as derived from a semantic model.""" | ||
|
||
@property | ||
@abstractmethod | ||
def derived_from_semantic_models(self) -> Sequence[SemanticModelReference]: | ||
"""The semantic models that this was derived from. | ||
|
||
The returned sequence should be ordered and not contain duplicates. | ||
""" | ||
raise NotImplementedError |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,16 +1,27 @@ | ||
from __future__ import annotations | ||
|
||
import logging | ||
from abc import ABC | ||
from dataclasses import dataclass | ||
from enum import Enum | ||
from typing import FrozenSet, Optional, Tuple | ||
from typing import FrozenSet, Optional, Sequence, Tuple | ||
|
||
from dbt_semantic_interfaces.enum_extension import assert_values_exhausted | ||
from dbt_semantic_interfaces.protocols.dimension import DimensionType | ||
from dbt_semantic_interfaces.references import DimensionReference, MetricReference, SemanticModelReference | ||
from dbt_semantic_interfaces.references import ( | ||
DimensionReference, | ||
EntityReference, | ||
MetricReference, | ||
SemanticModelReference, | ||
) | ||
from dbt_semantic_interfaces.type_enums.date_part import DatePart | ||
from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity | ||
from typing_extensions import override | ||
|
||
from metricflow_semantics.specs.spec_classes import EntityReference | ||
from metricflow_semantics.model.linkable_element_property import LinkableElementProperty | ||
from metricflow_semantics.model.semantic_model_derivation import SemanticModelDerivation | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class LinkableElementType(Enum): | ||
|
@@ -40,45 +51,6 @@ def is_dimension_type(self) -> bool: | |
return assert_values_exhausted(element_type) | ||
|
||
|
||
class LinkableElementProperty(Enum): | ||
"""The properties associated with a valid linkable element. | ||
|
||
Local means an element that is defined within the same semantic model as the measure. This definition is used | ||
throughout the related classes. | ||
""" | ||
|
||
# A local element as per above definition. | ||
LOCAL = "local" | ||
# A local dimension that is prefixed with a local primary entity. | ||
LOCAL_LINKED = "local_linked" | ||
# An element that was joined to the measure semantic model by an entity. | ||
JOINED = "joined" | ||
# An element that was joined to the measure semantic model by joining multiple semantic models. | ||
MULTI_HOP = "multi_hop" | ||
# A time dimension that is a version of a time dimension in a semantic model, but at a different granularity. | ||
DERIVED_TIME_GRANULARITY = "derived_time_granularity" | ||
# Refers to an entity, not a dimension. | ||
ENTITY = "entity" | ||
# See metric_time in DataSet | ||
METRIC_TIME = "metric_time" | ||
# Refers to a metric, not a dimension. | ||
METRIC = "metric" | ||
|
||
@staticmethod | ||
def all_properties() -> FrozenSet[LinkableElementProperty]: # noqa: D102 | ||
return frozenset( | ||
{ | ||
LinkableElementProperty.LOCAL, | ||
LinkableElementProperty.LOCAL_LINKED, | ||
LinkableElementProperty.JOINED, | ||
LinkableElementProperty.MULTI_HOP, | ||
LinkableElementProperty.DERIVED_TIME_GRANULARITY, | ||
LinkableElementProperty.METRIC_TIME, | ||
LinkableElementProperty.METRIC, | ||
} | ||
) | ||
|
||
|
||
@dataclass(frozen=True) | ||
class ElementPathKey: | ||
"""A key that can uniquely identify an element and the joins used to realize the element.""" | ||
|
@@ -110,8 +82,14 @@ class SemanticModelJoinPathElement: | |
join_on_entity: EntityReference | ||
|
||
|
||
class LinkableElement(SemanticModelDerivation, ABC): | ||
"""An entity / dimension that may have been joined by entities.""" | ||
|
||
pass | ||
|
||
|
||
@dataclass(frozen=True) | ||
class LinkableDimension: | ||
class LinkableDimension(LinkableElement): | ||
"""Describes how a dimension can be realized by joining based on entity links.""" | ||
|
||
# The semantic model where this dimension was defined. | ||
|
@@ -143,9 +121,20 @@ def path_key(self) -> ElementPathKey: # noqa: D102 | |
def reference(self) -> DimensionReference: # noqa: D102 | ||
return DimensionReference(element_name=self.element_name) | ||
|
||
@property | ||
@override | ||
def derived_from_semantic_models(self) -> Sequence[SemanticModelReference]: | ||
semantic_model_references = set() | ||
if self.semantic_model_origin: | ||
semantic_model_references.add(self.semantic_model_origin) | ||
for join_path_item in self.join_path: | ||
semantic_model_references.add(join_path_item.semantic_model_reference) | ||
|
||
return sorted(semantic_model_references, key=lambda reference: reference.semantic_model_name) | ||
|
||
|
||
@dataclass(frozen=True) | ||
class LinkableEntity: | ||
class LinkableEntity(LinkableElement, SemanticModelDerivation): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You don't need both of these, right, just LinkableElement? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right - updated. |
||
"""Describes how an entity can be realized by joining based on entity links.""" | ||
|
||
# The semantic model where this entity was defined. | ||
|
@@ -165,9 +154,18 @@ def path_key(self) -> ElementPathKey: # noqa: D102 | |
def reference(self) -> EntityReference: # noqa: D102 | ||
return EntityReference(element_name=self.element_name) | ||
|
||
@property | ||
@override | ||
def derived_from_semantic_models(self) -> Sequence[SemanticModelReference]: | ||
semantic_model_references = {self.semantic_model_origin} | ||
for join_path_item in self.join_path: | ||
semantic_model_references.add(join_path_item.semantic_model_reference) | ||
|
||
return sorted(semantic_model_references, key=lambda reference: reference.semantic_model_name) | ||
|
||
|
||
@dataclass(frozen=True) | ||
class LinkableMetric: | ||
class LinkableMetric(LinkableElement, SemanticModelDerivation): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same thing here, just LinkableElement? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated. |
||
"""Describes how a metric can be realized by joining based on entity links.""" | ||
|
||
element_name: str | ||
|
@@ -187,6 +185,15 @@ def path_key(self) -> ElementPathKey: # noqa: D102 | |
def reference(self) -> MetricReference: # noqa: D102 | ||
return MetricReference(element_name=self.element_name) | ||
|
||
@property | ||
@override | ||
def derived_from_semantic_models(self) -> Sequence[SemanticModelReference]: | ||
semantic_model_references = {self.join_by_semantic_model} | ||
for join_path_item in self.join_path: | ||
semantic_model_references.add(join_path_item.semantic_model_reference) | ||
|
||
return sorted(semantic_model_references, key=lambda reference: reference.semantic_model_name) | ||
|
||
|
||
@dataclass(frozen=True) | ||
class SemanticModelJoinPath: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok. We might need to do something about this log level for deployment - if it turns out to be common we'll want it to show up with level WARN or INFO instead of EXCEPTION - but I think we can do that via datadog instead of here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a fail-safe and shouldn't really be hit - if it is, it means there's a bug that we need to fix.