Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a Resolver for Saved-Query Dependencies #1152

Merged
merged 14 commits into from
Apr 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20240426-144119.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Add a Dependency Resolver for Saved Queries
time: 2024-04-26T14:41:19.27946-07:00
custom:
Author: plypaul
Issue: "1155"
Empty file.
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
from __future__ import annotations

import logging
from dataclasses import dataclass
from typing import Tuple

from dbt_semantic_interfaces.protocols import SemanticManifest
from dbt_semantic_interfaces.references import (
SemanticModelReference,
)

from metricflow_semantics.model.semantic_manifest_lookup import SemanticManifestLookup
from metricflow_semantics.query.query_parser import MetricFlowQueryParser
from metricflow_semantics.specs.query_param_implementations import SavedQueryParameter

logger = logging.getLogger(__name__)


@dataclass(frozen=True)
class SavedQueryDependencySet:
"""The dependencies of a saved query.

The primary use case is to handle creation of the cache item associated with the saved query. The dependencies
listed in this class must be up-to-date before the cache associated with the saved query can be created. Otherwise,
running the export / creating the cache may create a cache item that is out-of-date / unusable.
"""

# The semantic models that the saved query depends on.
semantic_model_references: Tuple[SemanticModelReference, ...]


class SavedQueryDependencyResolver:
"""Resolves the dependencies of a saved query. Also see `SavedQueryDependencySet`."""

def __init__(self, semantic_manifest: SemanticManifest) -> None: # noqa: D107
self._semantic_manifest = semantic_manifest
self._query_parser = MetricFlowQueryParser(SemanticManifestLookup(semantic_manifest))

def _resolve_dependencies(self, saved_query_name: str) -> SavedQueryDependencySet:
parse_result = self._query_parser.parse_and_validate_saved_query(
saved_query_parameter=SavedQueryParameter(saved_query_name),
where_filter=None,
limit=None,
time_constraint_start=None,
time_constraint_end=None,
order_by_names=None,
order_by_parameters=None,
)

return SavedQueryDependencySet(
semantic_model_references=tuple(
sorted(
parse_result.queried_semantic_models,
key=lambda reference: reference.semantic_model_name,
)
),
)

def resolve_dependencies(self, saved_query_name: str) -> SavedQueryDependencySet:
"""Return the dependencies of the given saved query in the manifest."""
try:
return self._resolve_dependencies(saved_query_name)
except Exception:
logger.exception(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok. We might need to do something about this log level for deployment - if it turns out to be common we'll want it to show up with level WARN or INFO instead of EXCEPTION - but I think we can do that via datadog instead of here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a fail-safe and shouldn't really be hit - if it is, it means there's a bug that we need to fix.

f"Got an exception while getting the dependencies of saved-query {repr(saved_query_name)}. "
f"All semantic models will be returned instead for safety."
)
return SavedQueryDependencySet(
semantic_model_references=tuple(
sorted(
(semantic_model.reference for semantic_model in self._semantic_manifest.semantic_models),
key=lambda reference: reference.semantic_model_name,
)
),
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from __future__ import annotations

from typing import Dict, Iterable, Tuple, TypeVar

IterableT = TypeVar("IterableT")


def ordered_dedupe(*iterables: Iterable[IterableT]) -> Tuple[IterableT, ...]:
"""De-duplicates the items in the iterables while preserving the order."""
ordered_results: Dict[IterableT, None] = {}
for iterable in iterables:
for item in iterable:
ordered_results[item] = None

return tuple(ordered_results.keys())
Comment on lines +8 to +15
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a less robust custom implementation of the (admittedly poorly named) unique_everseen function in more-itertools.

Since dbt-semantic-interfaces depends on more-itertools we should add the dependency with the same version range and use it here as well.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually I just noticed this takes *iterables, so it's a less robust implementation of unique_everseen(itertools.chain(iterables)). The point still stands, we should use the libraries, especially where iterator collection helpers are concerned.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, we can make that change, but let me check about the "minimize dependencies" request and check how that method works with types.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I already looked at the dependencies, that's not an issue. The typing is another matter, more-itertools is an old library and these methods date back a long way.

2 changes: 1 addition & 1 deletion metricflow-semantics/metricflow_semantics/instances.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@
EntitySpec,
GroupByMetricSpec,
InstanceSpec,
InstanceSpecSet,
MeasureSpec,
MetadataSpec,
MetricSpec,
TimeDimensionSpec,
)
from metricflow_semantics.specs.spec_set import InstanceSpecSet

# Type for the specification used in the instance.
SpecT = TypeVar("SpecT", bound=InstanceSpec)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from __future__ import annotations

from enum import Enum
from typing import FrozenSet


class LinkableElementProperty(Enum):
"""The properties associated with a valid linkable element.

Local means an element that is defined within the same semantic model as the measure. This definition is used
throughout the related classes.
"""

# A local element as per above definition.
LOCAL = "local"
# A local dimension that is prefixed with a local primary entity.
LOCAL_LINKED = "local_linked"
# An element that was joined to the measure semantic model by an entity.
JOINED = "joined"
# An element that was joined to the measure semantic model by joining multiple semantic models.
MULTI_HOP = "multi_hop"
# A time dimension that is a version of a time dimension in a semantic model, but at a different granularity.
DERIVED_TIME_GRANULARITY = "derived_time_granularity"
# Refers to an entity, not a dimension.
ENTITY = "entity"
# See metric_time in DataSet
METRIC_TIME = "metric_time"
# Refers to a metric, not a dimension.
METRIC = "metric"

@staticmethod
def all_properties() -> FrozenSet[LinkableElementProperty]: # noqa: D102
return frozenset(
{
LinkableElementProperty.LOCAL,
LinkableElementProperty.LOCAL_LINKED,
LinkableElementProperty.JOINED,
LinkableElementProperty.MULTI_HOP,
LinkableElementProperty.DERIVED_TIME_GRANULARITY,
LinkableElementProperty.METRIC_TIME,
LinkableElementProperty.METRIC,
}
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from __future__ import annotations

from abc import ABC, abstractmethod
from typing import Sequence

from dbt_semantic_interfaces.references import SemanticModelReference


class SemanticModelDerivation(ABC):
"""Interface for an object that can be described as derived from a semantic model."""

@property
@abstractmethod
def derived_from_semantic_models(self) -> Sequence[SemanticModelReference]:
"""The semantic models that this was derived from.

The returned sequence should be ordered and not contain duplicates.
"""
raise NotImplementedError
Original file line number Diff line number Diff line change
@@ -1,16 +1,34 @@
from __future__ import annotations

import logging
from abc import ABC
from dataclasses import dataclass
from enum import Enum
from typing import FrozenSet, Optional, Tuple
from typing import FrozenSet, Optional, Sequence, Tuple

from dbt_semantic_interfaces.enum_extension import assert_values_exhausted
from dbt_semantic_interfaces.protocols.dimension import DimensionType
from dbt_semantic_interfaces.references import DimensionReference, MetricReference, SemanticModelReference
from dbt_semantic_interfaces.references import (
DimensionReference,
EntityReference,
MetricReference,
SemanticModelReference,
)
from dbt_semantic_interfaces.type_enums.date_part import DatePart
from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity
from typing_extensions import override

from metricflow_semantics.specs.spec_classes import EntityReference
from metricflow_semantics.model.linkable_element_property import LinkableElementProperty
from metricflow_semantics.model.semantic_model_derivation import SemanticModelDerivation
from metricflow_semantics.specs.spec_classes import (
DimensionSpec,
EntitySpec,
GroupByMetricSpec,
LinkableInstanceSpec,
TimeDimensionSpec,
)

logger = logging.getLogger(__name__)


class LinkableElementType(Enum):
Expand Down Expand Up @@ -40,45 +58,6 @@ def is_dimension_type(self) -> bool:
return assert_values_exhausted(element_type)


class LinkableElementProperty(Enum):
"""The properties associated with a valid linkable element.

Local means an element that is defined within the same semantic model as the measure. This definition is used
throughout the related classes.
"""

# A local element as per above definition.
LOCAL = "local"
# A local dimension that is prefixed with a local primary entity.
LOCAL_LINKED = "local_linked"
# An element that was joined to the measure semantic model by an entity.
JOINED = "joined"
# An element that was joined to the measure semantic model by joining multiple semantic models.
MULTI_HOP = "multi_hop"
# A time dimension that is a version of a time dimension in a semantic model, but at a different granularity.
DERIVED_TIME_GRANULARITY = "derived_time_granularity"
# Refers to an entity, not a dimension.
ENTITY = "entity"
# See metric_time in DataSet
METRIC_TIME = "metric_time"
# Refers to a metric, not a dimension.
METRIC = "metric"

@staticmethod
def all_properties() -> FrozenSet[LinkableElementProperty]: # noqa: D102
return frozenset(
{
LinkableElementProperty.LOCAL,
LinkableElementProperty.LOCAL_LINKED,
LinkableElementProperty.JOINED,
LinkableElementProperty.MULTI_HOP,
LinkableElementProperty.DERIVED_TIME_GRANULARITY,
LinkableElementProperty.METRIC_TIME,
LinkableElementProperty.METRIC,
}
)


@dataclass(frozen=True)
class ElementPathKey:
"""A key that can uniquely identify an element and the joins used to realize the element."""
Expand All @@ -101,6 +80,37 @@ def __post_init__(self) -> None:
else:
assert_values_exhausted(element_type)

@property
def spec(self) -> LinkableInstanceSpec:
"""The corresponding spec object for this path key."""
if self.element_type is LinkableElementType.DIMENSION:
return DimensionSpec(
element_name=self.element_name,
entity_links=self.entity_links,
)
elif self.element_type is LinkableElementType.TIME_DIMENSION:
assert (
self.time_granularity is not None
), f"{self.time_granularity=} should not be None as per check in dataclass validation"
return TimeDimensionSpec(
element_name=self.element_name,
entity_links=self.entity_links,
time_granularity=self.time_granularity,
date_part=self.date_part,
)
elif self.element_type is LinkableElementType.ENTITY:
return EntitySpec(
element_name=self.element_name,
entity_links=self.entity_links,
)
elif self.element_type is LinkableElementType.METRIC:
return GroupByMetricSpec(
element_name=self.element_name,
entity_links=self.entity_links,
)
else:
assert_values_exhausted(self.element_type)


@dataclass(frozen=True)
class SemanticModelJoinPathElement:
Expand All @@ -110,8 +120,14 @@ class SemanticModelJoinPathElement:
join_on_entity: EntityReference


class LinkableElement(SemanticModelDerivation, ABC):
"""An entity / dimension that may have been joined by entities."""

pass


@dataclass(frozen=True)
class LinkableDimension:
class LinkableDimension(LinkableElement):
"""Describes how a dimension can be realized by joining based on entity links."""

# The semantic model where this dimension was defined.
Expand Down Expand Up @@ -143,9 +159,20 @@ def path_key(self) -> ElementPathKey: # noqa: D102
def reference(self) -> DimensionReference: # noqa: D102
return DimensionReference(element_name=self.element_name)

@property
@override
def derived_from_semantic_models(self) -> Sequence[SemanticModelReference]:
semantic_model_references = set()
if self.semantic_model_origin:
semantic_model_references.add(self.semantic_model_origin)
for join_path_item in self.join_path:
semantic_model_references.add(join_path_item.semantic_model_reference)

return sorted(semantic_model_references, key=lambda reference: reference.semantic_model_name)


@dataclass(frozen=True)
class LinkableEntity:
class LinkableEntity(LinkableElement):
"""Describes how an entity can be realized by joining based on entity links."""

# The semantic model where this entity was defined.
Expand All @@ -165,9 +192,18 @@ def path_key(self) -> ElementPathKey: # noqa: D102
def reference(self) -> EntityReference: # noqa: D102
return EntityReference(element_name=self.element_name)

@property
@override
def derived_from_semantic_models(self) -> Sequence[SemanticModelReference]:
semantic_model_references = {self.semantic_model_origin}
for join_path_item in self.join_path:
semantic_model_references.add(join_path_item.semantic_model_reference)

return sorted(semantic_model_references, key=lambda reference: reference.semantic_model_name)


@dataclass(frozen=True)
class LinkableMetric:
class LinkableMetric(LinkableElement):
"""Describes how a metric can be realized by joining based on entity links."""

element_name: str
Expand All @@ -187,6 +223,15 @@ def path_key(self) -> ElementPathKey: # noqa: D102
def reference(self) -> MetricReference: # noqa: D102
return MetricReference(element_name=self.element_name)

@property
@override
def derived_from_semantic_models(self) -> Sequence[SemanticModelReference]:
semantic_model_references = {self.join_by_semantic_model}
for join_path_item in self.join_path:
semantic_model_references.add(join_path_item.semantic_model_reference)

return sorted(semantic_model_references, key=lambda reference: reference.semantic_model_name)


@dataclass(frozen=True)
class SemanticModelJoinPath:
Expand Down
Loading
Loading