Skip to content

Commit

Permalink
Cache linkable elements (#1229)
Browse files Browse the repository at this point in the history
Recently we've seen an increase in latency for compile time on queries.
The theory is that this might be caused by the addition of
`LinkableMetrics` to group by options, which may have dramatically
increased the number of group by options available. Looping through them
could add a lot of slowness to the query.

I added some logs to see how long those loops were taking, and looking
at data from the past 24 hours, the call to
`linkable_elements_for_measure` took about .1 seconds on each call for
long-running queries. This could be called quite a few times for the
same measure within a given query, resulting in a lot of duplicate work
& unnecessary latency. To fix that, I decided to cache that method. I
also cached a couple of similar methods along the way.

I tested this primarily by running queries in pytest and showing INFO
logs. There was a major reduction in the logs for these functions after
adding the cache (especially in scenarios where a given measure is used
several times in one query).

The other changes in this PR are making params hashable, which is
required for the cache decorator.
I did not add an expiration for the cache - since these classes are
scoped to a given semantic manifest, the valid linkable elements for a
given query should never change.
  • Loading branch information
courtneyholcomb authored May 24, 2024
1 parent ff012cc commit 198b124
Show file tree
Hide file tree
Showing 6 changed files with 35 additions and 21 deletions.
7 changes: 7 additions & 0 deletions .changes/unreleased/Under the Hood-20240523-155148.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kind: Under the Hood
body: Cache functions related to parsing group by options in order to improve query
performance.
time: 2024-05-23T15:51:48.789152-07:00
custom:
Author: courtneyholcomb
Issue: "1229"
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import annotations

import logging
import time
from collections import defaultdict
from dataclasses import dataclass, field
from typing import Dict, FrozenSet, List, Sequence, Set, Tuple
Expand Down Expand Up @@ -28,6 +30,8 @@
TimeDimensionSpec,
)

logger = logging.getLogger(__name__)


@dataclass(frozen=True)
class LinkableElementSet(SemanticModelDerivation):
Expand Down Expand Up @@ -379,6 +383,8 @@ def filter_by_spec_patterns(self, spec_patterns: Sequence[SpecPattern]) -> Linka
Returns a new set consisting of the elements in the `LinkableElementSet` that have a corresponding spec that
match all the given spec patterns.
"""
start_time = time.time()

# Spec patterns need all specs to match properly e.g. `BaseTimeGrainPattern`.
matching_specs: Sequence[InstanceSpec] = self.specs

Expand All @@ -402,8 +408,10 @@ def filter_by_spec_patterns(self, spec_patterns: Sequence[SpecPattern]) -> Linka
if LinkableElementSet._path_key_to_spec(path_key) in specs_to_include:
path_key_to_linkable_metrics[path_key] = linkable_metrics

return LinkableElementSet(
filtered_elements = LinkableElementSet(
path_key_to_linkable_dimensions=path_key_to_linkable_dimensions,
path_key_to_linkable_entities=path_key_to_linkable_entities,
path_key_to_linkable_metrics=path_key_to_linkable_metrics,
)
logger.info(f"Filtering valid linkable elements took: {time.time() - start_time:.2f}s")
return filtered_elements
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import functools
import logging
import time
from typing import Dict, FrozenSet, Optional, Sequence, Set
Expand Down Expand Up @@ -44,15 +45,16 @@ def __init__(self, semantic_manifest: SemanticManifest, semantic_model_lookup: S
max_entity_links=MAX_JOIN_HOPS,
)

@functools.lru_cache
def linkable_elements_for_measure(
self,
measure_reference: MeasureReference,
with_any_of: Optional[Set[LinkableElementProperty]] = None,
without_any_of: Optional[Set[LinkableElementProperty]] = None,
with_any_of: Optional[FrozenSet[LinkableElementProperty]] = None,
without_any_of: Optional[FrozenSet[LinkableElementProperty]] = None,
) -> LinkableElementSet:
"""Return the set of linkable elements reachable from a given measure."""
frozen_with_any_of = LinkableElementProperty.all_properties() if with_any_of is None else frozenset(with_any_of)
frozen_without_any_of = frozenset() if without_any_of is None else frozenset(without_any_of)
frozen_with_any_of = LinkableElementProperty.all_properties() if with_any_of is None else with_any_of
frozen_without_any_of = frozenset() if without_any_of is None else without_any_of

start_time = time.time()
linkable_element_set = self._linkable_spec_resolver.get_linkable_element_set_for_measure(
Expand All @@ -66,20 +68,22 @@ def linkable_elements_for_measure(

return linkable_element_set

@functools.lru_cache
def linkable_elements_for_no_metrics_query(
self,
with_any_of: Optional[Set[LinkableElementProperty]] = None,
without_any_of: Optional[Set[LinkableElementProperty]] = None,
with_any_of: Optional[FrozenSet[LinkableElementProperty]] = None,
without_any_of: Optional[FrozenSet[LinkableElementProperty]] = None,
) -> LinkableElementSet:
"""Return the reachable linkable elements for a dimension values query with no metrics."""
frozen_with_any_of = LinkableElementProperty.all_properties() if with_any_of is None else frozenset(with_any_of)
frozen_without_any_of = frozenset() if without_any_of is None else frozenset(without_any_of)
frozen_with_any_of = LinkableElementProperty.all_properties() if with_any_of is None else with_any_of
frozen_without_any_of = frozenset() if without_any_of is None else without_any_of

return self._linkable_spec_resolver.get_linkable_elements_for_distinct_values_query(
with_any_of=frozen_with_any_of,
without_any_of=frozen_without_any_of,
)

@functools.lru_cache
def linkable_elements_for_metrics(
self,
metric_references: Sequence[MetricReference],
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
from __future__ import annotations

import logging
import time
from contextlib import contextmanager
from dataclasses import dataclass
from typing import Dict, Iterator, List, Optional, Sequence, Set, Tuple
from typing import Dict, FrozenSet, Iterator, List, Optional, Sequence, Tuple

from dbt_semantic_interfaces.enum_extension import assert_values_exhausted
from dbt_semantic_interfaces.type_enums import MetricType
Expand Down Expand Up @@ -130,8 +129,8 @@ def __init__(
manifest_lookup: SemanticManifestLookup,
suggestion_generator: Optional[QueryItemSuggestionGenerator],
source_spec_patterns: Sequence[SpecPattern] = (),
with_any_property: Optional[Set[LinkableElementProperty]] = None,
without_any_property: Optional[Set[LinkableElementProperty]] = None,
with_any_property: Optional[FrozenSet[LinkableElementProperty]] = None,
without_any_property: Optional[FrozenSet[LinkableElementProperty]] = None,
) -> None:
"""Initializer.
Expand Down Expand Up @@ -190,11 +189,9 @@ def visit_measure_node(self, node: MeasureGroupByItemSourceNode) -> PushDownResu
else:
assert_values_exhausted(metric.type)

start_time = time.time()
matching_items = items_available_for_measure.filter_by_spec_patterns(
patterns_to_apply + self._source_spec_patterns
)
logger.info(f"Filtering valid linkable elements took: {time.time() - start_time:.2f}s")

if logger.isEnabledFor(logging.DEBUG):
logger.debug(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def test_local_linked_elements_for_metric( # noqa: D103
request: FixtureRequest, mf_test_configuration: MetricFlowTestConfiguration, metric_lookup: MetricLookup
) -> None:
linkable_elements = metric_lookup.linkable_elements_for_metrics(
[MetricReference(element_name="listings")],
(MetricReference(element_name="listings"),),
with_any_property=frozenset({LinkableElementProperty.LOCAL_LINKED}),
without_any_property=frozenset({LinkableElementProperty.DERIVED_TIME_GRANULARITY}),
)
Expand Down Expand Up @@ -169,9 +169,7 @@ def test_linkable_elements_for_no_metrics_query(
) -> None:
"""Tests extracting linkable elements for a dimension values query with no metrics."""
linkable_elements = metric_lookup.linkable_elements_for_no_metrics_query(
without_any_of={
LinkableElementProperty.DERIVED_TIME_GRANULARITY,
}
without_any_of=frozenset({LinkableElementProperty.DERIVED_TIME_GRANULARITY})
)
sorted_specs = sorted(linkable_elements.specs, key=lambda x: x.qualified_name)
assert_object_snapshot_equal(
Expand Down
4 changes: 2 additions & 2 deletions metricflow/engine/metricflow_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -562,7 +562,7 @@ def simple_dimensions_for_metrics( # noqa: D102
) -> List[Dimension]:
path_key_to_linkable_dimensions = (
self._semantic_manifest_lookup.metric_lookup.linkable_elements_for_metrics(
metric_references=[MetricReference(element_name=mname) for mname in metric_names],
metric_references=tuple(MetricReference(element_name=mname) for mname in metric_names),
without_any_property=frozenset(without_any_property),
)
).path_key_to_linkable_dimensions
Expand Down Expand Up @@ -646,7 +646,7 @@ def list_dimensions(self) -> List[Dimension]: # noqa: D102
def entities_for_metrics(self, metric_names: List[str]) -> List[Entity]: # noqa: D102
path_key_to_linkable_entities = (
self._semantic_manifest_lookup.metric_lookup.linkable_elements_for_metrics(
metric_references=[MetricReference(element_name=mname) for mname in metric_names],
metric_references=tuple(MetricReference(element_name=mname) for mname in metric_names),
with_any_property=frozenset(
{
LinkableElementProperty.ENTITY,
Expand Down

0 comments on commit 198b124

Please sign in to comment.