Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a Resolver for Saved-Query Dependencies #1152

Merged
merged 14 commits into from
Apr 29, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from __future__ import annotations

from typing import Dict, Iterable, Tuple, TypeVar

IterableT = TypeVar("IterableT")


def ordered_dedupe(*iterables: Iterable[IterableT]) -> Tuple[IterableT, ...]:
"""De-duplicates the items in the iterables while preserving the order."""
ordered_results: Dict[IterableT, None] = {}
for iterable in iterables:
for item in iterable:
ordered_results[item] = None

return tuple(ordered_results.keys())
Comment on lines +8 to +15
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a less robust custom implementation of the (admittedly poorly named) unique_everseen function in more-itertools.

Since dbt-semantic-interfaces depends on more-itertools we should add the dependency with the same version range and use it here as well.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually I just noticed this takes *iterables, so it's a less robust implementation of unique_everseen(itertools.chain(iterables)). The point still stands, we should use the libraries, especially where iterator collection helpers are concerned.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, we can make that change, but let me check about the "minimize dependencies" request and check how that method works with types.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I already looked at the dependencies, that's not an issue. The typing is another matter, more-itertools is an old library and these methods date back a long way.

Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
ColumnAssociationResolver,
SingleColumnCorrelationKey,
)
from metricflow_semantics.specs.group_by_metric_spec import GroupByMetricSpec
from metricflow_semantics.specs.spec_classes import (
DimensionSpec,
EntitySpec,
GroupByMetricSpec,
InstanceSpec,
InstanceSpecVisitor,
MeasureSpec,
Expand Down

This file was deleted.

161 changes: 161 additions & 0 deletions metricflow-semantics/metricflow_semantics/specs/linkable_spec_set.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
from __future__ import annotations

import itertools
import typing
from dataclasses import dataclass
from typing import Dict, List, Sequence, Tuple

from dbt_semantic_interfaces.dataclass_serialization import SerializableDataclass
from dbt_semantic_interfaces.references import MeasureReference, MetricReference
from typing_extensions import override

from metricflow_semantics.collection_helpers.merger import Mergeable
from metricflow_semantics.specs.spec_classes import (
DimensionSpec,
EntitySpec,
GroupByMetricSpec,
InstanceSpec,
LinkableInstanceSpec,
TimeDimensionSpec,
)
from metricflow_semantics.specs.spec_set import InstanceSpecSet

if typing.TYPE_CHECKING:
from metricflow_semantics.model.semantics.metric_lookup import MetricLookup
from metricflow_semantics.model.semantics.semantic_model_lookup import SemanticModelLookup


@dataclass(frozen=True)
class LinkableSpecSet(Mergeable, SerializableDataclass):
"""Groups linkable specs."""

dimension_specs: Tuple[DimensionSpec, ...] = ()
time_dimension_specs: Tuple[TimeDimensionSpec, ...] = ()
entity_specs: Tuple[EntitySpec, ...] = ()
group_by_metric_specs: Tuple[GroupByMetricSpec, ...] = ()

@property
def contains_metric_time(self) -> bool:
"""Returns true if this set contains a spec referring to metric time at any grain."""
return len(self.metric_time_specs) > 0

def included_agg_time_dimension_specs_for_metric(
self, metric_reference: MetricReference, metric_lookup: MetricLookup
) -> List[TimeDimensionSpec]:
"""Get the time dims included that are valid agg time dimensions for the specified metric."""
queried_metric_time_specs = list(self.metric_time_specs)

valid_agg_time_dimensions = metric_lookup.get_valid_agg_time_dimensions_for_metric(metric_reference)
queried_agg_time_dimension_specs = (
list(set(self.time_dimension_specs).intersection(set(valid_agg_time_dimensions)))
+ queried_metric_time_specs
)

return queried_agg_time_dimension_specs

def included_agg_time_dimension_specs_for_measure(
self, measure_reference: MeasureReference, semantic_model_lookup: SemanticModelLookup
) -> List[TimeDimensionSpec]:
"""Get the time dims included that are valid agg time dimensions for the specified measure."""
queried_metric_time_specs = list(self.metric_time_specs)

valid_agg_time_dimensions = semantic_model_lookup.get_agg_time_dimension_specs_for_measure(measure_reference)
queried_agg_time_dimension_specs = (
list(set(self.time_dimension_specs).intersection(set(valid_agg_time_dimensions)))
+ queried_metric_time_specs
)

return queried_agg_time_dimension_specs

@property
def metric_time_specs(self) -> Sequence[TimeDimensionSpec]:
"""Returns any specs referring to metric time at any grain."""
return tuple(
time_dimension_spec
for time_dimension_spec in self.time_dimension_specs
if time_dimension_spec.is_metric_time
)

@property
def as_tuple(self) -> Tuple[LinkableInstanceSpec, ...]: # noqa: D102
return tuple(
itertools.chain(
self.dimension_specs, self.time_dimension_specs, self.entity_specs, self.group_by_metric_specs
)
)

@override
def merge(self, other: LinkableSpecSet) -> LinkableSpecSet:
return LinkableSpecSet(
dimension_specs=self.dimension_specs + other.dimension_specs,
time_dimension_specs=self.time_dimension_specs + other.time_dimension_specs,
entity_specs=self.entity_specs + other.entity_specs,
group_by_metric_specs=self.group_by_metric_specs + other.group_by_metric_specs,
)

@override
@classmethod
def empty_instance(cls) -> LinkableSpecSet:
return LinkableSpecSet()

def dedupe(self) -> LinkableSpecSet: # noqa: D102
# Use dictionaries to dedupe as it preserves insertion order.

dimension_spec_dict: Dict[DimensionSpec, None] = {}
for dimension_spec in self.dimension_specs:
dimension_spec_dict[dimension_spec] = None

time_dimension_spec_dict: Dict[TimeDimensionSpec, None] = {}
for time_dimension_spec in self.time_dimension_specs:
time_dimension_spec_dict[time_dimension_spec] = None

entity_spec_dict: Dict[EntitySpec, None] = {}
for entity_spec in self.entity_specs:
entity_spec_dict[entity_spec] = None

group_by_metric_spec_dict: Dict[GroupByMetricSpec, None] = {}
for group_by_metric in self.group_by_metric_specs:
group_by_metric_spec_dict[group_by_metric] = None

return LinkableSpecSet(
dimension_specs=tuple(dimension_spec_dict.keys()),
time_dimension_specs=tuple(time_dimension_spec_dict.keys()),
entity_specs=tuple(entity_spec_dict.keys()),
group_by_metric_specs=tuple(group_by_metric_spec_dict.keys()),
)

def is_subset_of(self, other_set: LinkableSpecSet) -> bool: # noqa: D102
return set(self.as_tuple).issubset(set(other_set.as_tuple))

@property
def as_spec_set(self) -> InstanceSpecSet: # noqa: D102
return InstanceSpecSet(
dimension_specs=self.dimension_specs,
time_dimension_specs=self.time_dimension_specs,
entity_specs=self.entity_specs,
group_by_metric_specs=self.group_by_metric_specs,
)

def difference(self, other: LinkableSpecSet) -> LinkableSpecSet: # noqa: D102
return LinkableSpecSet(
dimension_specs=tuple(set(self.dimension_specs) - set(other.dimension_specs)),
time_dimension_specs=tuple(set(self.time_dimension_specs) - set(other.time_dimension_specs)),
entity_specs=tuple(set(self.entity_specs) - set(other.entity_specs)),
group_by_metric_specs=tuple(set(self.group_by_metric_specs) - set(other.group_by_metric_specs)),
)

def __len__(self) -> int: # noqa: D105
return len(self.dimension_specs) + len(self.time_dimension_specs) + len(self.entity_specs)

@staticmethod
def create_from_spec_set(spec_set: InstanceSpecSet) -> LinkableSpecSet: # noqa: D102
return LinkableSpecSet(
dimension_specs=spec_set.dimension_specs,
time_dimension_specs=spec_set.time_dimension_specs,
entity_specs=spec_set.entity_specs,
group_by_metric_specs=spec_set.group_by_metric_specs,
)

@staticmethod
def create_from_specs(specs: Sequence[InstanceSpec]) -> LinkableSpecSet: # noqa: D102
return LinkableSpecSet.create_from_spec_set(InstanceSpecSet.create_from_specs(specs))
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from __future__ import annotations

from dataclasses import dataclass
from typing import Tuple

from dbt_semantic_interfaces.dataclass_serialization import SerializableDataclass

from metricflow_semantics.specs.spec_classes import DimensionSpec, TimeDimensionSpec


@dataclass(frozen=True)
class PartitionSpecSet(SerializableDataclass):
"""Grouping of the linkable specs."""

dimension_specs: Tuple[DimensionSpec, ...] = ()
time_dimension_specs: Tuple[TimeDimensionSpec, ...] = ()
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,12 @@
from metricflow_semantics.specs.patterns.spec_pattern import SpecPattern
from metricflow_semantics.specs.spec_classes import (
InstanceSpec,
InstanceSpecSet,
LinkableInstanceSpec,
LinkableSpecSet,
TimeDimensionSpec,
TimeDimensionSpecComparisonKey,
TimeDimensionSpecField,
)
from metricflow_semantics.specs.spec_set import group_specs_by_type


class BaseTimeGrainPattern(SpecPattern):
Expand Down Expand Up @@ -66,7 +65,7 @@ def match(self, candidate_specs: Sequence[InstanceSpec]) -> Sequence[InstanceSpe

return other_specs + tuple(BaseTimeGrainPattern(only_apply_for_metric_time=False).match(metric_time_specs))

spec_set = LinkableSpecSet.from_specs(InstanceSpecSet.from_specs(candidate_specs).linkable_specs)
spec_set = group_specs_by_type(candidate_specs)

spec_key_to_grains: Dict[TimeDimensionSpecComparisonKey, Set[TimeGranularity]] = defaultdict(set)
spec_key_to_specs: Dict[TimeDimensionSpecComparisonKey, List[TimeDimensionSpec]] = defaultdict(list)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
from typing_extensions import override

from metricflow_semantics.specs.patterns.spec_pattern import SpecPattern
from metricflow_semantics.specs.spec_classes import InstanceSpec, InstanceSpecSet, LinkableInstanceSpec
from metricflow_semantics.specs.spec_classes import InstanceSpec, LinkableInstanceSpec
from metricflow_semantics.specs.spec_set import group_specs_by_type

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -109,7 +110,7 @@ def _match_entity_links(self, candidate_specs: Sequence[LinkableInstanceSpec]) -

@override
def match(self, candidate_specs: Sequence[InstanceSpec]) -> Sequence[LinkableInstanceSpec]:
filtered_candidate_specs = InstanceSpecSet.from_specs(candidate_specs).linkable_specs
filtered_candidate_specs = group_specs_by_type(candidate_specs).linkable_specs
# Checks that EntityLinkPatternParameterSetField is valid wrt to the parameter set.

# Entity links could be a partial match, so it's handled separately.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
from metricflow_semantics.specs.patterns.spec_pattern import SpecPattern
from metricflow_semantics.specs.spec_classes import (
InstanceSpec,
InstanceSpecSet,
MetricSpec,
)
from metricflow_semantics.specs.spec_set import group_specs_by_type


@dataclass(frozen=True)
Expand All @@ -22,7 +22,7 @@ class MetricSpecPattern(SpecPattern):

@override
def match(self, candidate_specs: Sequence[InstanceSpec]) -> Sequence[MetricSpec]:
spec_set = InstanceSpecSet.from_specs(candidate_specs)
spec_set = group_specs_by_type(candidate_specs)
return tuple(
metric_name for metric_name in spec_set.metric_specs if metric_name.reference == self.metric_reference
)
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,9 @@
from metricflow_semantics.specs.patterns.spec_pattern import SpecPattern
from metricflow_semantics.specs.spec_classes import (
InstanceSpec,
InstanceSpecSet,
LinkableSpecSet,
TimeDimensionSpec,
)
from metricflow_semantics.specs.spec_set import group_specs_by_type


class MetricTimePattern(SpecPattern):
Expand All @@ -23,7 +22,7 @@ class MetricTimePattern(SpecPattern):

@override
def match(self, candidate_specs: Sequence[InstanceSpec]) -> Sequence[TimeDimensionSpec]:
spec_set = LinkableSpecSet.from_specs(InstanceSpecSet.from_specs(candidate_specs).linkable_specs)
spec_set = group_specs_by_type(candidate_specs)
return tuple(
time_dimension_spec
for time_dimension_spec in spec_set.time_dimension_specs
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
from metricflow_semantics.specs.patterns.spec_pattern import SpecPattern
from metricflow_semantics.specs.spec_classes import (
InstanceSpec,
InstanceSpecSet,
LinkableInstanceSpec,
)
from metricflow_semantics.specs.spec_set import group_specs_by_type


class NoneDatePartPattern(SpecPattern):
Expand All @@ -21,7 +21,7 @@ class NoneDatePartPattern(SpecPattern):
@override
def match(self, candidate_specs: Sequence[InstanceSpec]) -> Sequence[LinkableInstanceSpec]:
specs_to_return: List[LinkableInstanceSpec] = []
spec_set = InstanceSpecSet.from_specs(candidate_specs)
spec_set = group_specs_by_type(candidate_specs)
for time_dimension_spec in spec_set.time_dimension_specs:
if time_dimension_spec.date_part is None:
specs_to_return.append(time_dimension_spec)
Expand Down
Loading