From 6aca983c2f9ae1715994d2088da52369d81a7c65 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Thu, 23 May 2024 15:27:36 -0700 Subject: [PATCH 1/4] Cache linkable elements --- .../model/semantics/linkable_element_set.py | 10 +++++++++- .../model/semantics/metric_lookup.py | 12 ++++++++---- .../candidate_push_down/push_down_visitor.py | 9 +++------ .../model/test_semantic_model_container.py | 6 ++---- metricflow/engine/metricflow_engine.py | 4 ++-- 5 files changed, 24 insertions(+), 17 deletions(-) diff --git a/metricflow-semantics/metricflow_semantics/model/semantics/linkable_element_set.py b/metricflow-semantics/metricflow_semantics/model/semantics/linkable_element_set.py index 9e077279d6..8a59c9a6a4 100644 --- a/metricflow-semantics/metricflow_semantics/model/semantics/linkable_element_set.py +++ b/metricflow-semantics/metricflow_semantics/model/semantics/linkable_element_set.py @@ -1,5 +1,7 @@ from __future__ import annotations +import logging +import time from collections import defaultdict from dataclasses import dataclass, field from typing import Dict, FrozenSet, List, Sequence, Set, Tuple @@ -28,6 +30,8 @@ TimeDimensionSpec, ) +logger = logging.getLogger(__name__) + @dataclass(frozen=True) class LinkableElementSet(SemanticModelDerivation): @@ -379,6 +383,8 @@ def filter_by_spec_patterns(self, spec_patterns: Sequence[SpecPattern]) -> Linka Returns a new set consisting of the elements in the `LinkableElementSet` that have a corresponding spec that match all the given spec patterns. """ + start_time = time.time() + # Spec patterns need all specs to match properly e.g. `BaseTimeGrainPattern`. matching_specs: Sequence[InstanceSpec] = self.specs @@ -402,8 +408,10 @@ def filter_by_spec_patterns(self, spec_patterns: Sequence[SpecPattern]) -> Linka if LinkableElementSet._path_key_to_spec(path_key) in specs_to_include: path_key_to_linkable_metrics[path_key] = linkable_metrics - return LinkableElementSet( + filtered_elements = LinkableElementSet( path_key_to_linkable_dimensions=path_key_to_linkable_dimensions, path_key_to_linkable_entities=path_key_to_linkable_entities, path_key_to_linkable_metrics=path_key_to_linkable_metrics, ) + logger.info(f"Filtering valid linkable elements took: {time.time() - start_time:.2f}s") + return filtered_elements diff --git a/metricflow-semantics/metricflow_semantics/model/semantics/metric_lookup.py b/metricflow-semantics/metricflow_semantics/model/semantics/metric_lookup.py index 35ce7c45be..5311529fda 100644 --- a/metricflow-semantics/metricflow_semantics/model/semantics/metric_lookup.py +++ b/metricflow-semantics/metricflow_semantics/model/semantics/metric_lookup.py @@ -1,5 +1,6 @@ from __future__ import annotations +import functools import logging import time from typing import Dict, FrozenSet, Optional, Sequence, Set @@ -44,11 +45,12 @@ def __init__(self, semantic_manifest: SemanticManifest, semantic_model_lookup: S max_entity_links=MAX_JOIN_HOPS, ) + @functools.cache def linkable_elements_for_measure( self, measure_reference: MeasureReference, - with_any_of: Optional[Set[LinkableElementProperty]] = None, - without_any_of: Optional[Set[LinkableElementProperty]] = None, + with_any_of: Optional[FrozenSet[LinkableElementProperty]] = None, + without_any_of: Optional[FrozenSet[LinkableElementProperty]] = None, ) -> LinkableElementSet: """Return the set of linkable elements reachable from a given measure.""" frozen_with_any_of = LinkableElementProperty.all_properties() if with_any_of is None else frozenset(with_any_of) @@ -66,10 +68,11 @@ def linkable_elements_for_measure( return linkable_element_set + @functools.cache def linkable_elements_for_no_metrics_query( self, - with_any_of: Optional[Set[LinkableElementProperty]] = None, - without_any_of: Optional[Set[LinkableElementProperty]] = None, + with_any_of: Optional[FrozenSet[LinkableElementProperty]] = None, + without_any_of: Optional[FrozenSet[LinkableElementProperty]] = None, ) -> LinkableElementSet: """Return the reachable linkable elements for a dimension values query with no metrics.""" frozen_with_any_of = LinkableElementProperty.all_properties() if with_any_of is None else frozenset(with_any_of) @@ -80,6 +83,7 @@ def linkable_elements_for_no_metrics_query( without_any_of=frozen_without_any_of, ) + @functools.cache def linkable_elements_for_metrics( self, metric_references: Sequence[MetricReference], diff --git a/metricflow-semantics/metricflow_semantics/query/group_by_item/candidate_push_down/push_down_visitor.py b/metricflow-semantics/metricflow_semantics/query/group_by_item/candidate_push_down/push_down_visitor.py index 53da4b6706..2256db8a60 100644 --- a/metricflow-semantics/metricflow_semantics/query/group_by_item/candidate_push_down/push_down_visitor.py +++ b/metricflow-semantics/metricflow_semantics/query/group_by_item/candidate_push_down/push_down_visitor.py @@ -1,10 +1,9 @@ from __future__ import annotations import logging -import time from contextlib import contextmanager from dataclasses import dataclass -from typing import Dict, Iterator, List, Optional, Sequence, Set, Tuple +from typing import Dict, FrozenSet, Iterator, List, Optional, Sequence, Tuple from dbt_semantic_interfaces.enum_extension import assert_values_exhausted from dbt_semantic_interfaces.type_enums import MetricType @@ -130,8 +129,8 @@ def __init__( manifest_lookup: SemanticManifestLookup, suggestion_generator: Optional[QueryItemSuggestionGenerator], source_spec_patterns: Sequence[SpecPattern] = (), - with_any_property: Optional[Set[LinkableElementProperty]] = None, - without_any_property: Optional[Set[LinkableElementProperty]] = None, + with_any_property: Optional[FrozenSet[LinkableElementProperty]] = None, + without_any_property: Optional[FrozenSet[LinkableElementProperty]] = None, ) -> None: """Initializer. @@ -190,11 +189,9 @@ def visit_measure_node(self, node: MeasureGroupByItemSourceNode) -> PushDownResu else: assert_values_exhausted(metric.type) - start_time = time.time() matching_items = items_available_for_measure.filter_by_spec_patterns( patterns_to_apply + self._source_spec_patterns ) - logger.info(f"Filtering valid linkable elements took: {time.time() - start_time:.2f}s") if logger.isEnabledFor(logging.DEBUG): logger.debug( diff --git a/metricflow-semantics/tests_metricflow_semantics/model/test_semantic_model_container.py b/metricflow-semantics/tests_metricflow_semantics/model/test_semantic_model_container.py index 4e4ad8603e..bf872becaf 100644 --- a/metricflow-semantics/tests_metricflow_semantics/model/test_semantic_model_container.py +++ b/metricflow-semantics/tests_metricflow_semantics/model/test_semantic_model_container.py @@ -92,7 +92,7 @@ def test_local_linked_elements_for_metric( # noqa: D103 request: FixtureRequest, mf_test_configuration: MetricFlowTestConfiguration, metric_lookup: MetricLookup ) -> None: linkable_elements = metric_lookup.linkable_elements_for_metrics( - [MetricReference(element_name="listings")], + (MetricReference(element_name="listings"),), with_any_property=frozenset({LinkableElementProperty.LOCAL_LINKED}), without_any_property=frozenset({LinkableElementProperty.DERIVED_TIME_GRANULARITY}), ) @@ -169,9 +169,7 @@ def test_linkable_elements_for_no_metrics_query( ) -> None: """Tests extracting linkable elements for a dimension values query with no metrics.""" linkable_elements = metric_lookup.linkable_elements_for_no_metrics_query( - without_any_of={ - LinkableElementProperty.DERIVED_TIME_GRANULARITY, - } + without_any_of=frozenset({LinkableElementProperty.DERIVED_TIME_GRANULARITY}) ) sorted_specs = sorted(linkable_elements.specs, key=lambda x: x.qualified_name) assert_object_snapshot_equal( diff --git a/metricflow/engine/metricflow_engine.py b/metricflow/engine/metricflow_engine.py index f3d70d860c..b4f1a75748 100644 --- a/metricflow/engine/metricflow_engine.py +++ b/metricflow/engine/metricflow_engine.py @@ -562,7 +562,7 @@ def simple_dimensions_for_metrics( # noqa: D102 ) -> List[Dimension]: path_key_to_linkable_dimensions = ( self._semantic_manifest_lookup.metric_lookup.linkable_elements_for_metrics( - metric_references=[MetricReference(element_name=mname) for mname in metric_names], + metric_references=tuple(MetricReference(element_name=mname) for mname in metric_names), without_any_property=frozenset(without_any_property), ) ).path_key_to_linkable_dimensions @@ -646,7 +646,7 @@ def list_dimensions(self) -> List[Dimension]: # noqa: D102 def entities_for_metrics(self, metric_names: List[str]) -> List[Entity]: # noqa: D102 path_key_to_linkable_entities = ( self._semantic_manifest_lookup.metric_lookup.linkable_elements_for_metrics( - metric_references=[MetricReference(element_name=mname) for mname in metric_names], + metric_references=tuple(MetricReference(element_name=mname) for mname in metric_names), with_any_property=frozenset( { LinkableElementProperty.ENTITY, From 42296f725c5d10821c7a6ce9734a0fbb33c06283 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Thu, 23 May 2024 15:33:25 -0700 Subject: [PATCH 2/4] Remove duplicate frozenset conversion --- .../metricflow_semantics/model/semantics/metric_lookup.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/metricflow-semantics/metricflow_semantics/model/semantics/metric_lookup.py b/metricflow-semantics/metricflow_semantics/model/semantics/metric_lookup.py index 5311529fda..213deaaad9 100644 --- a/metricflow-semantics/metricflow_semantics/model/semantics/metric_lookup.py +++ b/metricflow-semantics/metricflow_semantics/model/semantics/metric_lookup.py @@ -53,8 +53,8 @@ def linkable_elements_for_measure( without_any_of: Optional[FrozenSet[LinkableElementProperty]] = None, ) -> LinkableElementSet: """Return the set of linkable elements reachable from a given measure.""" - frozen_with_any_of = LinkableElementProperty.all_properties() if with_any_of is None else frozenset(with_any_of) - frozen_without_any_of = frozenset() if without_any_of is None else frozenset(without_any_of) + frozen_with_any_of = LinkableElementProperty.all_properties() if with_any_of is None else with_any_of + frozen_without_any_of = frozenset() if without_any_of is None else without_any_of start_time = time.time() linkable_element_set = self._linkable_spec_resolver.get_linkable_element_set_for_measure( @@ -75,8 +75,8 @@ def linkable_elements_for_no_metrics_query( without_any_of: Optional[FrozenSet[LinkableElementProperty]] = None, ) -> LinkableElementSet: """Return the reachable linkable elements for a dimension values query with no metrics.""" - frozen_with_any_of = LinkableElementProperty.all_properties() if with_any_of is None else frozenset(with_any_of) - frozen_without_any_of = frozenset() if without_any_of is None else frozenset(without_any_of) + frozen_with_any_of = LinkableElementProperty.all_properties() if with_any_of is None else with_any_of + frozen_without_any_of = frozenset() if without_any_of is None else without_any_of return self._linkable_spec_resolver.get_linkable_elements_for_distinct_values_query( with_any_of=frozen_with_any_of, From 2afbfc232cecb29cbe05879eddb8b18f0252bc44 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Thu, 23 May 2024 15:51:59 -0700 Subject: [PATCH 3/4] Changelog --- .changes/unreleased/Under the Hood-20240523-155148.yaml | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 .changes/unreleased/Under the Hood-20240523-155148.yaml diff --git a/.changes/unreleased/Under the Hood-20240523-155148.yaml b/.changes/unreleased/Under the Hood-20240523-155148.yaml new file mode 100644 index 0000000000..18366aece5 --- /dev/null +++ b/.changes/unreleased/Under the Hood-20240523-155148.yaml @@ -0,0 +1,7 @@ +kind: Under the Hood +body: Cache functions related to parsing group by options in order to improve query + performance. +time: 2024-05-23T15:51:48.789152-07:00 +custom: + Author: courtneyholcomb + Issue: "1229" From 87ddb5c19c3189882dc5268326c7550ab5d627ba Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Thu, 23 May 2024 15:56:24 -0700 Subject: [PATCH 4/4] Use functools.lru_cache instead of functools.cache functools.cache is basically just a wrapper around functools.lru_cache, but it wasn't added until Python 3.9. Since we support Python 3.8, we need to use lru_cache instead. --- .../metricflow_semantics/model/semantics/metric_lookup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/metricflow-semantics/metricflow_semantics/model/semantics/metric_lookup.py b/metricflow-semantics/metricflow_semantics/model/semantics/metric_lookup.py index 213deaaad9..ca2b01eadb 100644 --- a/metricflow-semantics/metricflow_semantics/model/semantics/metric_lookup.py +++ b/metricflow-semantics/metricflow_semantics/model/semantics/metric_lookup.py @@ -45,7 +45,7 @@ def __init__(self, semantic_manifest: SemanticManifest, semantic_model_lookup: S max_entity_links=MAX_JOIN_HOPS, ) - @functools.cache + @functools.lru_cache def linkable_elements_for_measure( self, measure_reference: MeasureReference, @@ -68,7 +68,7 @@ def linkable_elements_for_measure( return linkable_element_set - @functools.cache + @functools.lru_cache def linkable_elements_for_no_metrics_query( self, with_any_of: Optional[FrozenSet[LinkableElementProperty]] = None, @@ -83,7 +83,7 @@ def linkable_elements_for_no_metrics_query( without_any_of=frozen_without_any_of, ) - @functools.cache + @functools.lru_cache def linkable_elements_for_metrics( self, metric_references: Sequence[MetricReference],