-
Notifications
You must be signed in to change notification settings - Fork 97
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Cache building operations in
DataflowPlanBuilder
(#1448)
This PR adds a few LRU caches to handle building operations within the `DataflowPlanBuilder`. Since the same metric may be used multiple times in a derived metric (or between queries), there can be significant performance improvements. Please view by commit as there were signature / type changes to making the changes cleaner.
- Loading branch information
Showing
12 changed files
with
251 additions
and
103 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
from __future__ import annotations | ||
|
||
from dataclasses import dataclass | ||
from typing import Optional | ||
|
||
from metricflow_semantics.collection_helpers.lru_cache import LruCache | ||
from metricflow_semantics.specs.linkable_spec_set import LinkableSpecSet | ||
from metricflow_semantics.specs.metric_spec import MetricSpec | ||
from metricflow_semantics.specs.where_filter.where_filter_transform import WhereSpecFactory | ||
|
||
from metricflow.dataflow.builder.measure_spec_properties import MeasureSpecProperties | ||
from metricflow.dataflow.builder.source_node_recipe import SourceNodeRecipe | ||
from metricflow.dataflow.dataflow_plan import DataflowPlanNode | ||
from metricflow.plan_conversion.node_processor import PredicatePushdownState | ||
|
||
|
||
@dataclass(frozen=True) | ||
class FindSourceNodeRecipeParameterSet: | ||
"""Parameters for `DataflowPlanBuilder._find_source_node_recipe()`.""" | ||
|
||
linkable_spec_set: LinkableSpecSet | ||
predicate_pushdown_state: PredicatePushdownState | ||
measure_spec_properties: Optional[MeasureSpecProperties] | ||
|
||
|
||
@dataclass(frozen=True) | ||
class FindSourceNodeRecipeResult: | ||
"""Result for `DataflowPlanBuilder._find_source_node_recipe()`.""" | ||
|
||
source_node_recipe: Optional[SourceNodeRecipe] | ||
|
||
|
||
@dataclass(frozen=True) | ||
class BuildAnyMetricOutputNodeParameterSet: | ||
"""Parameters for `DataflowPlanBuilder._build_any_metric_output_node()`.""" | ||
|
||
metric_spec: MetricSpec | ||
queried_linkable_specs: LinkableSpecSet | ||
filter_spec_factory: WhereSpecFactory | ||
predicate_pushdown_state: PredicatePushdownState | ||
for_group_by_source_node: bool | ||
|
||
|
||
class DataflowPlanBuilderCache: | ||
"""Cache for internal methods in `DataflowPlanBuilder`.""" | ||
|
||
def __init__( # noqa: D107 | ||
self, find_source_node_recipe_cache_size: int = 1000, build_any_metric_output_node_cache_size: int = 1000 | ||
) -> None: | ||
self._find_source_node_recipe_cache = LruCache[FindSourceNodeRecipeParameterSet, FindSourceNodeRecipeResult]( | ||
find_source_node_recipe_cache_size | ||
) | ||
self._build_any_metric_output_node_cache = LruCache[BuildAnyMetricOutputNodeParameterSet, DataflowPlanNode]( | ||
build_any_metric_output_node_cache_size | ||
) | ||
|
||
assert find_source_node_recipe_cache_size > 0 | ||
assert build_any_metric_output_node_cache_size > 0 | ||
|
||
def get_find_source_node_recipe_result( # noqa: D102 | ||
self, parameter_set: FindSourceNodeRecipeParameterSet | ||
) -> Optional[FindSourceNodeRecipeResult]: | ||
return self._find_source_node_recipe_cache.get(parameter_set) | ||
|
||
def set_find_source_node_recipe_result( # noqa: D102 | ||
self, parameter_set: FindSourceNodeRecipeParameterSet, source_node_recipe: FindSourceNodeRecipeResult | ||
) -> None: | ||
self._find_source_node_recipe_cache.set(parameter_set, source_node_recipe) | ||
|
||
def get_build_any_metric_output_node_result( # noqa: D102 | ||
self, parameter_set: BuildAnyMetricOutputNodeParameterSet | ||
) -> Optional[DataflowPlanNode]: | ||
return self._build_any_metric_output_node_cache.get(parameter_set) | ||
|
||
def set_build_any_metric_output_node_result( # noqa: D102 | ||
self, parameter_set: BuildAnyMetricOutputNodeParameterSet, dataflow_plan_node: DataflowPlanNode | ||
) -> None: | ||
self._build_any_metric_output_node_cache.set(parameter_set, dataflow_plan_node) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
from __future__ import annotations | ||
|
||
from dataclasses import dataclass | ||
from typing import Optional, Sequence | ||
|
||
from dbt_semantic_interfaces.references import TimeDimensionReference | ||
from metricflow_semantics.specs.measure_spec import MeasureSpec | ||
from metricflow_semantics.specs.non_additive_dimension_spec import NonAdditiveDimensionSpec | ||
|
||
|
||
@dataclass(frozen=True) | ||
class MeasureSpecProperties: | ||
"""Input dataclass for grouping properties of a sequence of MeasureSpecs.""" | ||
|
||
measure_specs: Sequence[MeasureSpec] | ||
semantic_model_name: str | ||
agg_time_dimension: TimeDimensionReference | ||
non_additive_dimension_spec: Optional[NonAdditiveDimensionSpec] = None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
from __future__ import annotations | ||
|
||
from dataclasses import dataclass | ||
from typing import List, Tuple | ||
|
||
from metricflow_semantics.specs.instance_spec import LinkableInstanceSpec | ||
|
||
from metricflow.dataflow.builder.node_evaluator import JoinLinkableInstancesRecipe | ||
from metricflow.dataflow.dataflow_plan import DataflowPlanNode | ||
from metricflow.dataflow.nodes.join_to_base import JoinDescription | ||
|
||
|
||
@dataclass(frozen=True) | ||
class SourceNodeRecipe: | ||
"""Get a recipe for how to build a dataflow plan node that outputs measures and linkable instances as needed.""" | ||
|
||
source_node: DataflowPlanNode | ||
required_local_linkable_specs: Tuple[LinkableInstanceSpec, ...] | ||
join_linkable_instances_recipes: Tuple[JoinLinkableInstancesRecipe, ...] | ||
|
||
@property | ||
def join_targets(self) -> List[JoinDescription]: | ||
"""Joins to be made to source node.""" | ||
return [join_recipe.join_description for join_recipe in self.join_linkable_instances_recipes] |
Oops, something went wrong.