From 198b12431070ba021886623e784cea104e815bdd Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Thu, 23 May 2024 18:19:38 -0700 Subject: [PATCH] Cache linkable elements (#1229) Recently we've seen an increase in latency for compile time on queries. The theory is that this might be caused by the addition of `LinkableMetrics` to group by options, which may have dramatically increased the number of group by options available. Looping through them could add a lot of slowness to the query. I added some logs to see how long those loops were taking, and looking at data from the past 24 hours, the call to `linkable_elements_for_measure` took about .1 seconds on each call for long-running queries. This could be called quite a few times for the same measure within a given query, resulting in a lot of duplicate work & unnecessary latency. To fix that, I decided to cache that method. I also cached a couple of similar methods along the way. I tested this primarily by running queries in pytest and showing INFO logs. There was a major reduction in the logs for these functions after adding the cache (especially in scenarios where a given measure is used several times in one query). The other changes in this PR are making params hashable, which is required for the cache decorator. I did not add an expiration for the cache - since these classes are scoped to a given semantic manifest, the valid linkable elements for a given query should never change. --- .../Under the Hood-20240523-155148.yaml | 7 +++++++ .../model/semantics/linkable_element_set.py | 10 +++++++++- .../model/semantics/metric_lookup.py | 20 +++++++++++-------- .../candidate_push_down/push_down_visitor.py | 9 +++------ .../model/test_semantic_model_container.py | 6 ++---- metricflow/engine/metricflow_engine.py | 4 ++-- 6 files changed, 35 insertions(+), 21 deletions(-) create mode 100644 .changes/unreleased/Under the Hood-20240523-155148.yaml diff --git a/.changes/unreleased/Under the Hood-20240523-155148.yaml b/.changes/unreleased/Under the Hood-20240523-155148.yaml new file mode 100644 index 0000000000..18366aece5 --- /dev/null +++ b/.changes/unreleased/Under the Hood-20240523-155148.yaml @@ -0,0 +1,7 @@ +kind: Under the Hood +body: Cache functions related to parsing group by options in order to improve query + performance. +time: 2024-05-23T15:51:48.789152-07:00 +custom: + Author: courtneyholcomb + Issue: "1229" diff --git a/metricflow-semantics/metricflow_semantics/model/semantics/linkable_element_set.py b/metricflow-semantics/metricflow_semantics/model/semantics/linkable_element_set.py index 9e077279d6..8a59c9a6a4 100644 --- a/metricflow-semantics/metricflow_semantics/model/semantics/linkable_element_set.py +++ b/metricflow-semantics/metricflow_semantics/model/semantics/linkable_element_set.py @@ -1,5 +1,7 @@ from __future__ import annotations +import logging +import time from collections import defaultdict from dataclasses import dataclass, field from typing import Dict, FrozenSet, List, Sequence, Set, Tuple @@ -28,6 +30,8 @@ TimeDimensionSpec, ) +logger = logging.getLogger(__name__) + @dataclass(frozen=True) class LinkableElementSet(SemanticModelDerivation): @@ -379,6 +383,8 @@ def filter_by_spec_patterns(self, spec_patterns: Sequence[SpecPattern]) -> Linka Returns a new set consisting of the elements in the `LinkableElementSet` that have a corresponding spec that match all the given spec patterns. """ + start_time = time.time() + # Spec patterns need all specs to match properly e.g. `BaseTimeGrainPattern`. matching_specs: Sequence[InstanceSpec] = self.specs @@ -402,8 +408,10 @@ def filter_by_spec_patterns(self, spec_patterns: Sequence[SpecPattern]) -> Linka if LinkableElementSet._path_key_to_spec(path_key) in specs_to_include: path_key_to_linkable_metrics[path_key] = linkable_metrics - return LinkableElementSet( + filtered_elements = LinkableElementSet( path_key_to_linkable_dimensions=path_key_to_linkable_dimensions, path_key_to_linkable_entities=path_key_to_linkable_entities, path_key_to_linkable_metrics=path_key_to_linkable_metrics, ) + logger.info(f"Filtering valid linkable elements took: {time.time() - start_time:.2f}s") + return filtered_elements diff --git a/metricflow-semantics/metricflow_semantics/model/semantics/metric_lookup.py b/metricflow-semantics/metricflow_semantics/model/semantics/metric_lookup.py index 35ce7c45be..ca2b01eadb 100644 --- a/metricflow-semantics/metricflow_semantics/model/semantics/metric_lookup.py +++ b/metricflow-semantics/metricflow_semantics/model/semantics/metric_lookup.py @@ -1,5 +1,6 @@ from __future__ import annotations +import functools import logging import time from typing import Dict, FrozenSet, Optional, Sequence, Set @@ -44,15 +45,16 @@ def __init__(self, semantic_manifest: SemanticManifest, semantic_model_lookup: S max_entity_links=MAX_JOIN_HOPS, ) + @functools.lru_cache def linkable_elements_for_measure( self, measure_reference: MeasureReference, - with_any_of: Optional[Set[LinkableElementProperty]] = None, - without_any_of: Optional[Set[LinkableElementProperty]] = None, + with_any_of: Optional[FrozenSet[LinkableElementProperty]] = None, + without_any_of: Optional[FrozenSet[LinkableElementProperty]] = None, ) -> LinkableElementSet: """Return the set of linkable elements reachable from a given measure.""" - frozen_with_any_of = LinkableElementProperty.all_properties() if with_any_of is None else frozenset(with_any_of) - frozen_without_any_of = frozenset() if without_any_of is None else frozenset(without_any_of) + frozen_with_any_of = LinkableElementProperty.all_properties() if with_any_of is None else with_any_of + frozen_without_any_of = frozenset() if without_any_of is None else without_any_of start_time = time.time() linkable_element_set = self._linkable_spec_resolver.get_linkable_element_set_for_measure( @@ -66,20 +68,22 @@ def linkable_elements_for_measure( return linkable_element_set + @functools.lru_cache def linkable_elements_for_no_metrics_query( self, - with_any_of: Optional[Set[LinkableElementProperty]] = None, - without_any_of: Optional[Set[LinkableElementProperty]] = None, + with_any_of: Optional[FrozenSet[LinkableElementProperty]] = None, + without_any_of: Optional[FrozenSet[LinkableElementProperty]] = None, ) -> LinkableElementSet: """Return the reachable linkable elements for a dimension values query with no metrics.""" - frozen_with_any_of = LinkableElementProperty.all_properties() if with_any_of is None else frozenset(with_any_of) - frozen_without_any_of = frozenset() if without_any_of is None else frozenset(without_any_of) + frozen_with_any_of = LinkableElementProperty.all_properties() if with_any_of is None else with_any_of + frozen_without_any_of = frozenset() if without_any_of is None else without_any_of return self._linkable_spec_resolver.get_linkable_elements_for_distinct_values_query( with_any_of=frozen_with_any_of, without_any_of=frozen_without_any_of, ) + @functools.lru_cache def linkable_elements_for_metrics( self, metric_references: Sequence[MetricReference], diff --git a/metricflow-semantics/metricflow_semantics/query/group_by_item/candidate_push_down/push_down_visitor.py b/metricflow-semantics/metricflow_semantics/query/group_by_item/candidate_push_down/push_down_visitor.py index 53da4b6706..2256db8a60 100644 --- a/metricflow-semantics/metricflow_semantics/query/group_by_item/candidate_push_down/push_down_visitor.py +++ b/metricflow-semantics/metricflow_semantics/query/group_by_item/candidate_push_down/push_down_visitor.py @@ -1,10 +1,9 @@ from __future__ import annotations import logging -import time from contextlib import contextmanager from dataclasses import dataclass -from typing import Dict, Iterator, List, Optional, Sequence, Set, Tuple +from typing import Dict, FrozenSet, Iterator, List, Optional, Sequence, Tuple from dbt_semantic_interfaces.enum_extension import assert_values_exhausted from dbt_semantic_interfaces.type_enums import MetricType @@ -130,8 +129,8 @@ def __init__( manifest_lookup: SemanticManifestLookup, suggestion_generator: Optional[QueryItemSuggestionGenerator], source_spec_patterns: Sequence[SpecPattern] = (), - with_any_property: Optional[Set[LinkableElementProperty]] = None, - without_any_property: Optional[Set[LinkableElementProperty]] = None, + with_any_property: Optional[FrozenSet[LinkableElementProperty]] = None, + without_any_property: Optional[FrozenSet[LinkableElementProperty]] = None, ) -> None: """Initializer. @@ -190,11 +189,9 @@ def visit_measure_node(self, node: MeasureGroupByItemSourceNode) -> PushDownResu else: assert_values_exhausted(metric.type) - start_time = time.time() matching_items = items_available_for_measure.filter_by_spec_patterns( patterns_to_apply + self._source_spec_patterns ) - logger.info(f"Filtering valid linkable elements took: {time.time() - start_time:.2f}s") if logger.isEnabledFor(logging.DEBUG): logger.debug( diff --git a/metricflow-semantics/tests_metricflow_semantics/model/test_semantic_model_container.py b/metricflow-semantics/tests_metricflow_semantics/model/test_semantic_model_container.py index 4e4ad8603e..bf872becaf 100644 --- a/metricflow-semantics/tests_metricflow_semantics/model/test_semantic_model_container.py +++ b/metricflow-semantics/tests_metricflow_semantics/model/test_semantic_model_container.py @@ -92,7 +92,7 @@ def test_local_linked_elements_for_metric( # noqa: D103 request: FixtureRequest, mf_test_configuration: MetricFlowTestConfiguration, metric_lookup: MetricLookup ) -> None: linkable_elements = metric_lookup.linkable_elements_for_metrics( - [MetricReference(element_name="listings")], + (MetricReference(element_name="listings"),), with_any_property=frozenset({LinkableElementProperty.LOCAL_LINKED}), without_any_property=frozenset({LinkableElementProperty.DERIVED_TIME_GRANULARITY}), ) @@ -169,9 +169,7 @@ def test_linkable_elements_for_no_metrics_query( ) -> None: """Tests extracting linkable elements for a dimension values query with no metrics.""" linkable_elements = metric_lookup.linkable_elements_for_no_metrics_query( - without_any_of={ - LinkableElementProperty.DERIVED_TIME_GRANULARITY, - } + without_any_of=frozenset({LinkableElementProperty.DERIVED_TIME_GRANULARITY}) ) sorted_specs = sorted(linkable_elements.specs, key=lambda x: x.qualified_name) assert_object_snapshot_equal( diff --git a/metricflow/engine/metricflow_engine.py b/metricflow/engine/metricflow_engine.py index f3d70d860c..b4f1a75748 100644 --- a/metricflow/engine/metricflow_engine.py +++ b/metricflow/engine/metricflow_engine.py @@ -562,7 +562,7 @@ def simple_dimensions_for_metrics( # noqa: D102 ) -> List[Dimension]: path_key_to_linkable_dimensions = ( self._semantic_manifest_lookup.metric_lookup.linkable_elements_for_metrics( - metric_references=[MetricReference(element_name=mname) for mname in metric_names], + metric_references=tuple(MetricReference(element_name=mname) for mname in metric_names), without_any_property=frozenset(without_any_property), ) ).path_key_to_linkable_dimensions @@ -646,7 +646,7 @@ def list_dimensions(self) -> List[Dimension]: # noqa: D102 def entities_for_metrics(self, metric_names: List[str]) -> List[Entity]: # noqa: D102 path_key_to_linkable_entities = ( self._semantic_manifest_lookup.metric_lookup.linkable_elements_for_metrics( - metric_references=[MetricReference(element_name=mname) for mname in metric_names], + metric_references=tuple(MetricReference(element_name=mname) for mname in metric_names), with_any_property=frozenset( { LinkableElementProperty.ENTITY,