Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cache linkable elements #1229

Merged
merged 4 commits into from
May 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .changes/unreleased/Under the Hood-20240523-155148.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kind: Under the Hood
body: Cache functions related to parsing group by options in order to improve query
performance.
time: 2024-05-23T15:51:48.789152-07:00
custom:
Author: courtneyholcomb
Issue: "1229"
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import annotations

import logging
import time
from collections import defaultdict
from dataclasses import dataclass, field
from typing import Dict, FrozenSet, List, Sequence, Set, Tuple
Expand Down Expand Up @@ -28,6 +30,8 @@
TimeDimensionSpec,
)

logger = logging.getLogger(__name__)


@dataclass(frozen=True)
class LinkableElementSet(SemanticModelDerivation):
Expand Down Expand Up @@ -379,6 +383,8 @@ def filter_by_spec_patterns(self, spec_patterns: Sequence[SpecPattern]) -> Linka
Returns a new set consisting of the elements in the `LinkableElementSet` that have a corresponding spec that
match all the given spec patterns.
"""
start_time = time.time()

# Spec patterns need all specs to match properly e.g. `BaseTimeGrainPattern`.
matching_specs: Sequence[InstanceSpec] = self.specs

Expand All @@ -402,8 +408,10 @@ def filter_by_spec_patterns(self, spec_patterns: Sequence[SpecPattern]) -> Linka
if LinkableElementSet._path_key_to_spec(path_key) in specs_to_include:
path_key_to_linkable_metrics[path_key] = linkable_metrics

return LinkableElementSet(
filtered_elements = LinkableElementSet(
path_key_to_linkable_dimensions=path_key_to_linkable_dimensions,
path_key_to_linkable_entities=path_key_to_linkable_entities,
path_key_to_linkable_metrics=path_key_to_linkable_metrics,
)
logger.info(f"Filtering valid linkable elements took: {time.time() - start_time:.2f}s")
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would love to cache this function too, but that change would be a bit more involved since base class (LinkableElementSet) relies heavily on dictionaries, which are not hashable.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we just store Sequence[Tuple[ElementPathKey, Sequence[LinkableDimension]]] or whatever instead of a dict? We go to some lengths to avoid mutation of these things anyway, might as well make it official.

We shouldn't do it in this PR, anyway, that sounds like a fairly involved change given the need to update callsites and all of those merge functions and whatnot.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah that could work!

return filtered_elements
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import functools
import logging
import time
from typing import Dict, FrozenSet, Optional, Sequence, Set
Expand Down Expand Up @@ -44,15 +45,16 @@ def __init__(self, semantic_manifest: SemanticManifest, semantic_model_lookup: S
max_entity_links=MAX_JOIN_HOPS,
)

@functools.lru_cache
def linkable_elements_for_measure(
self,
measure_reference: MeasureReference,
with_any_of: Optional[Set[LinkableElementProperty]] = None,
without_any_of: Optional[Set[LinkableElementProperty]] = None,
with_any_of: Optional[FrozenSet[LinkableElementProperty]] = None,
without_any_of: Optional[FrozenSet[LinkableElementProperty]] = None,
Comment on lines +52 to +53
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I kind of wondered why these weren't frozenset to begin with.

) -> LinkableElementSet:
"""Return the set of linkable elements reachable from a given measure."""
frozen_with_any_of = LinkableElementProperty.all_properties() if with_any_of is None else frozenset(with_any_of)
frozen_without_any_of = frozenset() if without_any_of is None else frozenset(without_any_of)
frozen_with_any_of = LinkableElementProperty.all_properties() if with_any_of is None else with_any_of
frozen_without_any_of = frozenset() if without_any_of is None else without_any_of

start_time = time.time()
linkable_element_set = self._linkable_spec_resolver.get_linkable_element_set_for_measure(
Expand All @@ -66,20 +68,22 @@ def linkable_elements_for_measure(

return linkable_element_set

@functools.lru_cache
def linkable_elements_for_no_metrics_query(
self,
with_any_of: Optional[Set[LinkableElementProperty]] = None,
without_any_of: Optional[Set[LinkableElementProperty]] = None,
with_any_of: Optional[FrozenSet[LinkableElementProperty]] = None,
without_any_of: Optional[FrozenSet[LinkableElementProperty]] = None,
) -> LinkableElementSet:
"""Return the reachable linkable elements for a dimension values query with no metrics."""
frozen_with_any_of = LinkableElementProperty.all_properties() if with_any_of is None else frozenset(with_any_of)
frozen_without_any_of = frozenset() if without_any_of is None else frozenset(without_any_of)
frozen_with_any_of = LinkableElementProperty.all_properties() if with_any_of is None else with_any_of
frozen_without_any_of = frozenset() if without_any_of is None else without_any_of

return self._linkable_spec_resolver.get_linkable_elements_for_distinct_values_query(
with_any_of=frozen_with_any_of,
without_any_of=frozen_without_any_of,
)

@functools.lru_cache
def linkable_elements_for_metrics(
self,
metric_references: Sequence[MetricReference],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oof Sequence isn't counted as immutable. Should we update this to Tuple or does the typechecker catch it if we pass in a non-hashable type?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The type checker catches it! It fails if the type isn't hashable.

Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
from __future__ import annotations

import logging
import time
from contextlib import contextmanager
from dataclasses import dataclass
from typing import Dict, Iterator, List, Optional, Sequence, Set, Tuple
from typing import Dict, FrozenSet, Iterator, List, Optional, Sequence, Tuple

from dbt_semantic_interfaces.enum_extension import assert_values_exhausted
from dbt_semantic_interfaces.type_enums import MetricType
Expand Down Expand Up @@ -130,8 +129,8 @@ def __init__(
manifest_lookup: SemanticManifestLookup,
suggestion_generator: Optional[QueryItemSuggestionGenerator],
source_spec_patterns: Sequence[SpecPattern] = (),
with_any_property: Optional[Set[LinkableElementProperty]] = None,
without_any_property: Optional[Set[LinkableElementProperty]] = None,
with_any_property: Optional[FrozenSet[LinkableElementProperty]] = None,
without_any_property: Optional[FrozenSet[LinkableElementProperty]] = None,
) -> None:
"""Initializer.

Expand Down Expand Up @@ -190,11 +189,9 @@ def visit_measure_node(self, node: MeasureGroupByItemSourceNode) -> PushDownResu
else:
assert_values_exhausted(metric.type)

start_time = time.time()
matching_items = items_available_for_measure.filter_by_spec_patterns(
patterns_to_apply + self._source_spec_patterns
)
logger.info(f"Filtering valid linkable elements took: {time.time() - start_time:.2f}s")

if logger.isEnabledFor(logging.DEBUG):
logger.debug(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def test_local_linked_elements_for_metric( # noqa: D103
request: FixtureRequest, mf_test_configuration: MetricFlowTestConfiguration, metric_lookup: MetricLookup
) -> None:
linkable_elements = metric_lookup.linkable_elements_for_metrics(
[MetricReference(element_name="listings")],
(MetricReference(element_name="listings"),),
with_any_property=frozenset({LinkableElementProperty.LOCAL_LINKED}),
without_any_property=frozenset({LinkableElementProperty.DERIVED_TIME_GRANULARITY}),
)
Expand Down Expand Up @@ -169,9 +169,7 @@ def test_linkable_elements_for_no_metrics_query(
) -> None:
"""Tests extracting linkable elements for a dimension values query with no metrics."""
linkable_elements = metric_lookup.linkable_elements_for_no_metrics_query(
without_any_of={
LinkableElementProperty.DERIVED_TIME_GRANULARITY,
}
without_any_of=frozenset({LinkableElementProperty.DERIVED_TIME_GRANULARITY})
)
sorted_specs = sorted(linkable_elements.specs, key=lambda x: x.qualified_name)
assert_object_snapshot_equal(
Expand Down
4 changes: 2 additions & 2 deletions metricflow/engine/metricflow_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -562,7 +562,7 @@ def simple_dimensions_for_metrics( # noqa: D102
) -> List[Dimension]:
path_key_to_linkable_dimensions = (
self._semantic_manifest_lookup.metric_lookup.linkable_elements_for_metrics(
metric_references=[MetricReference(element_name=mname) for mname in metric_names],
metric_references=tuple(MetricReference(element_name=mname) for mname in metric_names),
without_any_property=frozenset(without_any_property),
)
).path_key_to_linkable_dimensions
Expand Down Expand Up @@ -646,7 +646,7 @@ def list_dimensions(self) -> List[Dimension]: # noqa: D102
def entities_for_metrics(self, metric_names: List[str]) -> List[Entity]: # noqa: D102
path_key_to_linkable_entities = (
self._semantic_manifest_lookup.metric_lookup.linkable_elements_for_metrics(
metric_references=[MetricReference(element_name=mname) for mname in metric_names],
metric_references=tuple(MetricReference(element_name=mname) for mname in metric_names),
with_any_property=frozenset(
{
LinkableElementProperty.ENTITY,
Expand Down
Loading